mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-24 20:26:32 +03:00
Compare commits
56 Commits
v1.122.4
...
timerpool-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7ea70b8f57 | ||
|
|
676a88793a | ||
|
|
8d3e9d1dac | ||
|
|
09251f0a1e | ||
|
|
4ea5f8a84d | ||
|
|
cd52978096 | ||
|
|
f65e24b2ab | ||
|
|
0579e68409 | ||
|
|
f2aea8532f | ||
|
|
94473ed262 | ||
|
|
c646a66b60 | ||
|
|
ccf97a4143 | ||
|
|
66df8a5003 | ||
|
|
cea9505bab | ||
|
|
30ac8cd3fa | ||
|
|
a1f0b792af | ||
|
|
50f75d751f | ||
|
|
27f7bc81e0 | ||
|
|
90d23d7c9f | ||
|
|
f68c028673 | ||
|
|
f24bf391a4 | ||
|
|
bc64ecfa3d | ||
|
|
f0bbf6ec15 | ||
|
|
cff4bde4d6 | ||
|
|
1716f11677 | ||
|
|
b4932ed2da | ||
|
|
77f2ab139f | ||
|
|
5537140074 | ||
|
|
5d766bf7f1 | ||
|
|
5907239181 | ||
|
|
720c2bfa1d | ||
|
|
e971e6102e | ||
|
|
5cd6d7cfba | ||
|
|
907aa1973a | ||
|
|
d6dacd9771 | ||
|
|
5bb67a7f00 | ||
|
|
8c1c92d4c9 | ||
|
|
95ca45d05a | ||
|
|
828a2aaf17 | ||
|
|
007ae5a3f0 | ||
|
|
dcd23da4ba | ||
|
|
e33dbaf3d2 | ||
|
|
c68973a247 | ||
|
|
2c72ef0f38 | ||
|
|
bd0551da3b | ||
|
|
9f52c40b0b | ||
|
|
ba3b50df1d | ||
|
|
3cfeae7f1a | ||
|
|
32da04725b | ||
|
|
8ce4636bc0 | ||
|
|
6167ce655e | ||
|
|
f1e294aa2b | ||
|
|
b72bf6961d | ||
|
|
2b880fe7db | ||
|
|
9898743fbd | ||
|
|
ca372168ae |
@@ -31,7 +31,7 @@ type Group struct {
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
|
||||
Limit int `yaml:"limit,omitempty"`
|
||||
Limit *int `yaml:"limit,omitempty"`
|
||||
Rules []Rule `yaml:"rules"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
// Labels is a set of label value pairs, that will be added to every rule.
|
||||
@@ -91,8 +91,8 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
if g.EvalOffset != nil && g.EvalDelay != nil {
|
||||
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
||||
}
|
||||
if g.Limit < 0 {
|
||||
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
|
||||
if g.Limit != nil && *g.Limit < 0 {
|
||||
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit)
|
||||
}
|
||||
if g.Concurrency < 0 {
|
||||
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
|
||||
|
||||
@@ -181,9 +181,10 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
|
||||
limit := -1
|
||||
f(&Group{
|
||||
Name: "wrong limit",
|
||||
Limit: -1,
|
||||
Limit: &limit,
|
||||
}, false, "invalid limit")
|
||||
|
||||
f(&Group{
|
||||
|
||||
@@ -24,6 +24,10 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
|
||||
"Can be overridden by the limit option under group if specified. "+
|
||||
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
|
||||
"0 means no limit.")
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
|
||||
@@ -111,7 +115,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
Name: cfg.Name,
|
||||
File: cfg.File,
|
||||
Interval: cfg.Interval.Duration(),
|
||||
Limit: cfg.Limit,
|
||||
Concurrency: cfg.Concurrency,
|
||||
checksum: cfg.Checksum,
|
||||
Params: cfg.Params,
|
||||
@@ -128,6 +131,11 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
if g.Interval == 0 {
|
||||
g.Interval = defaultInterval
|
||||
}
|
||||
if cfg.Limit != nil {
|
||||
g.Limit = *cfg.Limit
|
||||
} else {
|
||||
g.Limit = *ruleResultsLimit
|
||||
}
|
||||
if g.Concurrency < 1 {
|
||||
g.Concurrency = 1
|
||||
}
|
||||
|
||||
@@ -372,20 +372,54 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
updateHeadersByConfig(w.Header(), hc.ResponseHeaders)
|
||||
w.WriteHeader(res.StatusCode)
|
||||
|
||||
copyBuf := copyBufPool.Get()
|
||||
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
|
||||
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
|
||||
copyBufPool.Put(copyBuf)
|
||||
err = copyStreamToClient(w, res.Body)
|
||||
_ = res.Body.Close()
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) && !errors.Is(err, context.Canceled) {
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
return true, false
|
||||
}
|
||||
return true, false
|
||||
}
|
||||
|
||||
func copyStreamToClient(client io.Writer, backend io.Reader) error {
|
||||
copyBuf := copyBufPool.Get()
|
||||
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
|
||||
defer copyBufPool.Put(copyBuf)
|
||||
buf := copyBuf.B
|
||||
|
||||
flusher, ok := client.(http.Flusher)
|
||||
if !ok {
|
||||
logger.Panicf("BUG: client must implement net/http.Flusher interface; got %T", client)
|
||||
}
|
||||
|
||||
for {
|
||||
n, backendErr := backend.Read(buf)
|
||||
if n > 0 {
|
||||
data := buf[:n]
|
||||
n, clientErr := client.Write(data)
|
||||
if clientErr != nil {
|
||||
return fmt.Errorf("cannot write data to client: %w", clientErr)
|
||||
}
|
||||
if n != len(data) {
|
||||
logger.Panicf("BUG: unexpected number of bytes written returned by client.Write; got %d; want %d", n, len(data))
|
||||
}
|
||||
// Flush the read data from the backend to the client as fast as possible
|
||||
// in order to reduce delays for data propagation.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/667
|
||||
flusher.Flush()
|
||||
}
|
||||
if backendErr != nil {
|
||||
if backendErr == io.EOF {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("cannot read data from backend: %w", backendErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var copyBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func copyHeader(dst, src http.Header) {
|
||||
|
||||
@@ -514,6 +514,11 @@ func (w *fakeResponseWriter) getResponse() string {
|
||||
return w.bb.String()
|
||||
}
|
||||
|
||||
// Flush implements net/http.Flusher
|
||||
func (w *fakeResponseWriter) Flush() {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
func (w *fakeResponseWriter) Header() http.Header {
|
||||
if w.h == nil {
|
||||
w.h = http.Header{}
|
||||
|
||||
@@ -115,7 +115,7 @@ func main() {
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create backup: %s", err)
|
||||
}
|
||||
pushmetrics.Stop()
|
||||
pushmetrics.StopAndPush()
|
||||
|
||||
startTime := time.Now()
|
||||
logger.Infof("gracefully shutting down http server for metrics at %q", listenAddrs)
|
||||
|
||||
@@ -68,7 +68,7 @@ func main() {
|
||||
if err := a.Run(ctx); err != nil {
|
||||
logger.Fatalf("cannot restore from backup: %s", err)
|
||||
}
|
||||
pushmetrics.Stop()
|
||||
pushmetrics.StopAndPush()
|
||||
srcFS.MustStop()
|
||||
dstFS.MustStop()
|
||||
|
||||
|
||||
@@ -197,13 +197,13 @@ func newNextSeriesForSearchQuery(ec *evalConfig, sq *storage.SearchQuery, expr g
|
||||
}
|
||||
s.summarize(aggrAvg, ec.startTime, ec.endTime, ec.storageStep, 0)
|
||||
t := timerpool.Get(30 * time.Second)
|
||||
defer timerpool.Put(t)
|
||||
select {
|
||||
case seriesCh <- s:
|
||||
case <-t.C:
|
||||
logger.Errorf("resource leak when processing the %s (full query: %s); please report this error to VictoriaMetrics developers",
|
||||
expr.AppendString(nil), ec.originalQuery)
|
||||
}
|
||||
timerpool.Put(t)
|
||||
return nil
|
||||
})
|
||||
close(seriesCh)
|
||||
|
||||
@@ -1150,15 +1150,23 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string,
|
||||
}
|
||||
qt.Printf("optimized calculation for instant rollup avg_over_time(m[d]) as (sum_over_time(m[d]) / count_over_time(m[d]))")
|
||||
fe := expr.(*metricsql.FuncExpr)
|
||||
feSum := *fe
|
||||
feSum.Name = "sum_over_time"
|
||||
feCount := *fe
|
||||
feCount.Name = "count_over_time"
|
||||
// copy RollupExpr to drop possible offset,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9762
|
||||
newArg := copyRollupExpr(fe.Args[0].(*metricsql.RollupExpr))
|
||||
newArg.Offset = nil
|
||||
be := &metricsql.BinaryOpExpr{
|
||||
Op: "/",
|
||||
KeepMetricNames: fe.KeepMetricNames,
|
||||
Left: &feSum,
|
||||
Right: &feCount,
|
||||
Left: &metricsql.FuncExpr{
|
||||
Name: "sum_over_time",
|
||||
Args: []metricsql.Expr{newArg},
|
||||
KeepMetricNames: fe.KeepMetricNames,
|
||||
},
|
||||
Right: &metricsql.FuncExpr{
|
||||
Name: "count_over_time",
|
||||
Args: []metricsql.Expr{newArg},
|
||||
KeepMetricNames: fe.KeepMetricNames,
|
||||
},
|
||||
}
|
||||
return evalExpr(qt, ec, be)
|
||||
case "rate":
|
||||
@@ -1172,8 +1180,12 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string,
|
||||
fe := afe.Args[0].(*metricsql.FuncExpr)
|
||||
feIncrease := *fe
|
||||
feIncrease.Name = "increase"
|
||||
re := fe.Args[0].(*metricsql.RollupExpr)
|
||||
d := re.Window.Duration(ec.Step)
|
||||
// copy RollupExpr to drop possible offset,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9762
|
||||
newArg := copyRollupExpr(fe.Args[0].(*metricsql.RollupExpr))
|
||||
newArg.Offset = nil
|
||||
feIncrease.Args = []metricsql.Expr{newArg}
|
||||
d := newArg.Window.Duration(ec.Step)
|
||||
if d == 0 {
|
||||
d = ec.Step
|
||||
}
|
||||
@@ -1193,8 +1205,12 @@ func evalInstantRollup(qt *querytracer.Tracer, ec *EvalConfig, funcName string,
|
||||
fe := expr.(*metricsql.FuncExpr)
|
||||
feIncrease := *fe
|
||||
feIncrease.Name = "increase"
|
||||
re := fe.Args[0].(*metricsql.RollupExpr)
|
||||
d := re.Window.Duration(ec.Step)
|
||||
// copy RollupExpr to drop possible offset,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9762
|
||||
newArg := copyRollupExpr(fe.Args[0].(*metricsql.RollupExpr))
|
||||
newArg.Offset = nil
|
||||
feIncrease.Args = []metricsql.Expr{newArg}
|
||||
d := newArg.Window.Duration(ec.Step)
|
||||
if d == 0 {
|
||||
d = ec.Step
|
||||
}
|
||||
@@ -1999,3 +2015,23 @@ func dropStaleNaNs(funcName string, values []float64, timestamps []int64) ([]flo
|
||||
}
|
||||
return dstValues, dstTimestamps
|
||||
}
|
||||
|
||||
func copyRollupExpr(re *metricsql.RollupExpr) *metricsql.RollupExpr {
|
||||
var newRe metricsql.RollupExpr
|
||||
newRe.Expr = re.Expr
|
||||
newRe.InheritStep = re.InheritStep
|
||||
newRe.At = re.At
|
||||
if re.Window != nil {
|
||||
newRe.Window = &metricsql.DurationExpr{}
|
||||
*newRe.Window = *re.Window
|
||||
}
|
||||
if re.Offset != nil {
|
||||
newRe.Offset = &metricsql.DurationExpr{}
|
||||
*newRe.Offset = *re.Offset
|
||||
}
|
||||
if re.Step != nil {
|
||||
newRe.Step = &metricsql.DurationExpr{}
|
||||
*newRe.Step = *re.Step
|
||||
}
|
||||
return &newRe
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM golang:1.25.0 AS build-web-stage
|
||||
FROM golang:1.25.1 AS build-web-stage
|
||||
COPY build /build
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
<link rel="apple-touch-icon" href="/favicon.svg"/>
|
||||
<link rel="mask-icon" href="/favicon.svg" color="#000000">
|
||||
|
||||
<meta name="robots" content="noindex">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=5"/>
|
||||
<meta name="theme-color" content="#000000"/>
|
||||
<meta name="description" content="Explore and troubleshoot your VictoriaMetrics data"/>
|
||||
|
||||
56
app/vmui/packages/vmui/package-lock.json
generated
56
app/vmui/packages/vmui/package-lock.json
generated
@@ -17,7 +17,7 @@
|
||||
"react-input-mask": "^2.0.4",
|
||||
"react-router-dom": "^7.6.3",
|
||||
"uplot": "^1.6.32",
|
||||
"vite": "^7.0.4",
|
||||
"vite": "^7.1.5",
|
||||
"web-vitals": "^5.0.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -7321,13 +7321,13 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tinyglobby": {
|
||||
"version": "0.2.14",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz",
|
||||
"integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==",
|
||||
"version": "0.2.15",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
|
||||
"integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fdir": "^6.4.4",
|
||||
"picomatch": "^4.0.2"
|
||||
"fdir": "^6.5.0",
|
||||
"picomatch": "^4.0.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
@@ -7337,10 +7337,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/tinyglobby/node_modules/fdir": {
|
||||
"version": "6.4.6",
|
||||
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz",
|
||||
"integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==",
|
||||
"version": "6.5.0",
|
||||
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
|
||||
"integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"picomatch": "^3 || ^4"
|
||||
},
|
||||
@@ -7351,9 +7354,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/tinyglobby/node_modules/picomatch": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz",
|
||||
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
@@ -7657,17 +7660,17 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "7.0.4",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.0.4.tgz",
|
||||
"integrity": "sha512-SkaSguuS7nnmV7mfJ8l81JGBFV7Gvzp8IzgE8A8t23+AxuNX61Q5H1Tpz5efduSN7NHC8nQXD3sKQKZAu5mNEA==",
|
||||
"version": "7.1.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.1.5.tgz",
|
||||
"integrity": "sha512-4cKBO9wR75r0BeIWWWId9XK9Lj6La5X846Zw9dFfzMRw38IlTk2iCcUt6hsyiDRcPidc55ZParFYDXi0nXOeLQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
"fdir": "^6.4.6",
|
||||
"picomatch": "^4.0.2",
|
||||
"fdir": "^6.5.0",
|
||||
"picomatch": "^4.0.3",
|
||||
"postcss": "^8.5.6",
|
||||
"rollup": "^4.40.0",
|
||||
"tinyglobby": "^0.2.14"
|
||||
"rollup": "^4.43.0",
|
||||
"tinyglobby": "^0.2.15"
|
||||
},
|
||||
"bin": {
|
||||
"vite": "bin/vite.js"
|
||||
@@ -7772,10 +7775,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite/node_modules/fdir": {
|
||||
"version": "6.4.6",
|
||||
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz",
|
||||
"integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==",
|
||||
"version": "6.5.0",
|
||||
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
|
||||
"integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"picomatch": "^3 || ^4"
|
||||
},
|
||||
@@ -7786,9 +7792,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite/node_modules/picomatch": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz",
|
||||
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
"react-input-mask": "^2.0.4",
|
||||
"react-router-dom": "^7.6.3",
|
||||
"uplot": "^1.6.32",
|
||||
"vite": "^7.0.4",
|
||||
"vite": "^7.1.5",
|
||||
"web-vitals": "^5.0.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@@ -13,6 +13,8 @@ import Button from "../../Button/Button";
|
||||
interface DatePickerProps {
|
||||
date: Date | Dayjs
|
||||
format?: string
|
||||
minDate?: Date | Dayjs
|
||||
maxDate?: Date | Dayjs
|
||||
onChange: (date: string) => void
|
||||
}
|
||||
|
||||
@@ -24,6 +26,8 @@ enum CalendarTypeView {
|
||||
|
||||
const Calendar: FC<DatePickerProps> = ({
|
||||
date,
|
||||
minDate,
|
||||
maxDate,
|
||||
format = DATE_TIME_FORMAT,
|
||||
onChange,
|
||||
}) => {
|
||||
@@ -34,6 +38,8 @@ const Calendar: FC<DatePickerProps> = ({
|
||||
const today = dayjs.tz();
|
||||
const viewDateIsToday = today.format(DATE_FORMAT) === viewDate.format(DATE_FORMAT);
|
||||
const { isMobile } = useDeviceDetect();
|
||||
const min = minDate ? dayjs(minDate) : undefined;
|
||||
const max = maxDate ? dayjs(maxDate) : undefined;
|
||||
|
||||
const toggleDisplayYears = () => {
|
||||
setViewType(prev => prev === CalendarTypeView.years ? CalendarTypeView.days : CalendarTypeView.years);
|
||||
@@ -75,9 +81,13 @@ const Calendar: FC<DatePickerProps> = ({
|
||||
onChangeViewDate={handleChangeViewDate}
|
||||
toggleDisplayYears={toggleDisplayYears}
|
||||
showArrowNav={viewType === CalendarTypeView.days}
|
||||
hasPrev={viewType === CalendarTypeView.days && (!min || viewDate.startOf("month").isAfter(min))}
|
||||
hasNext={viewType === CalendarTypeView.days && (!max || viewDate.endOf("month").isBefore(max))}
|
||||
/>
|
||||
{viewType === CalendarTypeView.days && (
|
||||
<CalendarBody
|
||||
minDate={min}
|
||||
maxDate={max}
|
||||
viewDate={viewDate}
|
||||
selectDate={selectDate}
|
||||
onChangeSelectDate={handleChangeSelectDate}
|
||||
@@ -85,12 +95,16 @@ const Calendar: FC<DatePickerProps> = ({
|
||||
)}
|
||||
{viewType === CalendarTypeView.years && (
|
||||
<YearsList
|
||||
minDate={min}
|
||||
maxDate={max}
|
||||
viewDate={viewDate}
|
||||
onChangeViewDate={handleChangeViewDate}
|
||||
/>
|
||||
)}
|
||||
{viewType === CalendarTypeView.months && (
|
||||
<MonthsList
|
||||
minDate={min}
|
||||
maxDate={max}
|
||||
selectDate={selectDate}
|
||||
viewDate={viewDate}
|
||||
onChangeViewDate={handleChangeViewDate}
|
||||
|
||||
@@ -4,6 +4,8 @@ import classNames from "classnames";
|
||||
import Tooltip from "../../../Tooltip/Tooltip";
|
||||
|
||||
interface CalendarBodyProps {
|
||||
minDate?: Dayjs
|
||||
maxDate?: Dayjs
|
||||
viewDate: Dayjs
|
||||
selectDate: Dayjs
|
||||
onChangeSelectDate: (date: Dayjs) => void
|
||||
@@ -11,7 +13,7 @@ interface CalendarBodyProps {
|
||||
|
||||
const weekday = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"];
|
||||
|
||||
const CalendarBody: FC<CalendarBodyProps> = ({ viewDate: date, selectDate, onChangeSelectDate }) => {
|
||||
const CalendarBody: FC<CalendarBodyProps> = ({ minDate, maxDate, viewDate: date, selectDate, onChangeSelectDate }) => {
|
||||
const format = "YYYY-MM-DD";
|
||||
const today = dayjs.tz();
|
||||
const viewDate = dayjs(date.format(format));
|
||||
@@ -44,21 +46,25 @@ const CalendarBody: FC<CalendarBodyProps> = ({ viewDate: date, selectDate, onCha
|
||||
</Tooltip>
|
||||
))}
|
||||
|
||||
{days.map((d, i) => (
|
||||
<div
|
||||
className={classNames({
|
||||
"vm-calendar-body-cell": true,
|
||||
"vm-calendar-body-cell_day": true,
|
||||
"vm-calendar-body-cell_day_empty": !d,
|
||||
"vm-calendar-body-cell_day_active": (d && d.format(format)) === selectDate.format(format),
|
||||
"vm-calendar-body-cell_day_today": (d && d.format(format)) === today.format(format)
|
||||
})}
|
||||
key={d ? d.format(format) : i}
|
||||
onClick={createHandlerSelectDate(d)}
|
||||
>
|
||||
{d && d.format("D")}
|
||||
</div>
|
||||
))}
|
||||
{days.map((d, i) => {
|
||||
const isDisabled = d && ((minDate && d.isBefore(minDate)) || (maxDate && d.isAfter(maxDate)));
|
||||
return (
|
||||
<div
|
||||
className={classNames({
|
||||
"vm-calendar-body-cell": true,
|
||||
"vm-calendar-body-cell_day": true,
|
||||
"vm-calendar-body-cell_day_empty": !d,
|
||||
"vm-calendar-body-cell_day_active": (d && d.format(format)) === selectDate.format(format),
|
||||
"vm-calendar-body-cell_day_today": (d && d.format(format)) === today.format(format),
|
||||
"vm-calendar-body-cell_day_disabled": isDisabled,
|
||||
})}
|
||||
key={d ? d.format(format) : i}
|
||||
onClick={createHandlerSelectDate(d)}
|
||||
>
|
||||
{d && d.format("D")}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
import { FC } from "preact/compat";
|
||||
import { Dayjs } from "dayjs";
|
||||
import { ArrowDownIcon, ArrowDropDownIcon } from "../../../Icons";
|
||||
import classNames from "classnames";
|
||||
|
||||
interface CalendarHeaderProps {
|
||||
viewDate: Dayjs
|
||||
onChangeViewDate: (date: Dayjs) => void
|
||||
showArrowNav: boolean
|
||||
toggleDisplayYears: () => void
|
||||
hasNext: boolean
|
||||
hasPrev: boolean
|
||||
}
|
||||
|
||||
const CalendarHeader: FC<CalendarHeaderProps> = ({ viewDate, showArrowNav, onChangeViewDate, toggleDisplayYears }) => {
|
||||
const CalendarHeader: FC<CalendarHeaderProps> = ({ hasPrev, hasNext, viewDate, showArrowNav, onChangeViewDate, toggleDisplayYears }) => {
|
||||
|
||||
const setPrevMonth = () => {
|
||||
onChangeViewDate(viewDate.subtract(1, "month"));
|
||||
@@ -35,14 +38,20 @@ const CalendarHeader: FC<CalendarHeaderProps> = ({ viewDate, showArrowNav, onCha
|
||||
{showArrowNav && (
|
||||
<div className="vm-calendar-header-right">
|
||||
<div
|
||||
className="vm-calendar-header-right__prev"
|
||||
onClick={setPrevMonth}
|
||||
className={classNames({
|
||||
"vm-calendar-header-right__prev": true,
|
||||
"vm-calendar-header-right_disabled": !hasPrev,
|
||||
})}
|
||||
onClick={hasPrev ? setPrevMonth : undefined}
|
||||
>
|
||||
<ArrowDownIcon/>
|
||||
</div>
|
||||
<div
|
||||
className="vm-calendar-header-right__next"
|
||||
onClick={setNextMonth}
|
||||
className={classNames({
|
||||
"vm-calendar-header-right__next": true,
|
||||
"vm-calendar-header-right_disabled": !hasNext,
|
||||
})}
|
||||
onClick={hasNext ? setNextMonth : undefined}
|
||||
>
|
||||
<ArrowDownIcon/>
|
||||
</div>
|
||||
|
||||
@@ -3,13 +3,14 @@ import dayjs, { Dayjs } from "dayjs";
|
||||
import classNames from "classnames";
|
||||
|
||||
interface CalendarMonthsProps {
|
||||
minDate?: Dayjs
|
||||
maxDate?: Dayjs
|
||||
viewDate: Dayjs,
|
||||
selectDate: Dayjs
|
||||
|
||||
onChangeViewDate: (date: Dayjs) => void
|
||||
}
|
||||
|
||||
const MonthsList: FC<CalendarMonthsProps> = ({ viewDate, selectDate, onChangeViewDate }) => {
|
||||
const MonthsList: FC<CalendarMonthsProps> = ({ minDate, maxDate, viewDate, selectDate, onChangeViewDate }) => {
|
||||
|
||||
const today = dayjs().format("MM");
|
||||
const currentMonths = useMemo(() => selectDate.format("MM"), [selectDate]);
|
||||
@@ -29,20 +30,24 @@ const MonthsList: FC<CalendarMonthsProps> = ({ viewDate, selectDate, onChangeVie
|
||||
|
||||
return (
|
||||
<div className="vm-calendar-years">
|
||||
{months.map(m => (
|
||||
<div
|
||||
className={classNames({
|
||||
"vm-calendar-years__year": true,
|
||||
"vm-calendar-years__year_selected": m.format("MM") === currentMonths,
|
||||
"vm-calendar-years__year_today": m.format("MM") === today
|
||||
})}
|
||||
id={`vm-calendar-year-${m.format("MM")}`}
|
||||
key={m.format("MM")}
|
||||
onClick={createHandlerClick(m)}
|
||||
>
|
||||
{m.format("MMMM")}
|
||||
</div>
|
||||
))}
|
||||
{months.map(m => {
|
||||
const isDisabled = m && ((minDate && m.isBefore(minDate)) || (maxDate && m.isAfter(maxDate)));
|
||||
return (
|
||||
<div
|
||||
className={classNames({
|
||||
"vm-calendar-years__year": true,
|
||||
"vm-calendar-years__year_selected": m.format("MM") === currentMonths,
|
||||
"vm-calendar-years__year_today": m.format("MM") === today,
|
||||
"vm-calendar-years__year_disabled": isDisabled,
|
||||
})}
|
||||
id={`vm-calendar-year-${m.format("MM")}`}
|
||||
key={m.format("MM")}
|
||||
onClick={isDisabled ? undefined : createHandlerClick(m)}
|
||||
>
|
||||
{m.format("MMMM")}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -3,11 +3,13 @@ import dayjs, { Dayjs } from "dayjs";
|
||||
import classNames from "classnames";
|
||||
|
||||
interface CalendarYearsProps {
|
||||
minDate?: Dayjs
|
||||
maxDate?: Dayjs
|
||||
viewDate: Dayjs
|
||||
onChangeViewDate: (date: Dayjs) => void
|
||||
}
|
||||
|
||||
const YearsList: FC<CalendarYearsProps> = ({ viewDate, onChangeViewDate }) => {
|
||||
const YearsList: FC<CalendarYearsProps> = ({ minDate, maxDate, viewDate, onChangeViewDate }) => {
|
||||
|
||||
const today = dayjs().format("YYYY");
|
||||
const currentYear = useMemo(() => viewDate.format("YYYY"), [viewDate]);
|
||||
@@ -30,20 +32,24 @@ const YearsList: FC<CalendarYearsProps> = ({ viewDate, onChangeViewDate }) => {
|
||||
|
||||
return (
|
||||
<div className="vm-calendar-years">
|
||||
{years.map(y => (
|
||||
<div
|
||||
className={classNames({
|
||||
"vm-calendar-years__year": true,
|
||||
"vm-calendar-years__year_selected": y.format("YYYY") === currentYear,
|
||||
"vm-calendar-years__year_today": y.format("YYYY") === today
|
||||
})}
|
||||
id={`vm-calendar-year-${y.format("YYYY")}`}
|
||||
key={y.format("YYYY")}
|
||||
onClick={createHandlerClick(y)}
|
||||
>
|
||||
{y.format("YYYY")}
|
||||
</div>
|
||||
))}
|
||||
{years.map(y => {
|
||||
const isDisabled = y && (minDate && y.isBefore(minDate)) || (maxDate && y.isAfter(maxDate));
|
||||
return (
|
||||
<div
|
||||
className={classNames({
|
||||
"vm-calendar-years__year": true,
|
||||
"vm-calendar-years__year_selected": y.format("YYYY") === currentYear,
|
||||
"vm-calendar-years__year_today": y.format("YYYY") === today,
|
||||
"vm-calendar-years__year_disabled": isDisabled,
|
||||
})}
|
||||
id={`vm-calendar-year-${y.format("YYYY")}`}
|
||||
key={y.format("YYYY")}
|
||||
onClick={isDisabled ? undefined : createHandlerClick(y)}
|
||||
>
|
||||
{y.format("YYYY")}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -69,6 +69,10 @@
|
||||
}
|
||||
}
|
||||
|
||||
&_disabled {
|
||||
color: $color-text-disabled;
|
||||
}
|
||||
|
||||
&__prev {
|
||||
transform: rotate(90deg);
|
||||
}
|
||||
@@ -108,7 +112,12 @@
|
||||
cursor: pointer;
|
||||
transition: color 200ms ease, background-color 300ms ease-in-out;
|
||||
|
||||
&:hover {
|
||||
&_disabled {
|
||||
cursor: unset;
|
||||
color: $color-text-disabled;
|
||||
}
|
||||
|
||||
&:not(&_disabled):hover {
|
||||
background-color: $color-hover-black;
|
||||
}
|
||||
|
||||
@@ -148,7 +157,12 @@
|
||||
cursor: pointer;
|
||||
transition: color 200ms ease, background-color 300ms ease-in-out;
|
||||
|
||||
&:hover {
|
||||
&_disabled {
|
||||
cursor: unset;
|
||||
color: $color-text-disabled;
|
||||
}
|
||||
|
||||
&:not(&_disabled):hover {
|
||||
background-color: $color-hover-black;
|
||||
}
|
||||
|
||||
|
||||
@@ -10,8 +10,10 @@ import useEventListener from "../../../hooks/useEventListener";
|
||||
interface DatePickerProps {
|
||||
date: string | Date | Dayjs,
|
||||
targetRef: React.RefObject<HTMLElement>;
|
||||
format?: string
|
||||
label?: string
|
||||
format?: string;
|
||||
label?: string;
|
||||
minDate?: Date | Dayjs;
|
||||
maxDate?: Date | Dayjs;
|
||||
onChange: (val: string) => void
|
||||
}
|
||||
|
||||
@@ -20,7 +22,9 @@ const DatePicker = forwardRef<HTMLDivElement, DatePickerProps>(({
|
||||
targetRef,
|
||||
format = DATE_TIME_FORMAT,
|
||||
onChange,
|
||||
label
|
||||
label,
|
||||
minDate,
|
||||
maxDate
|
||||
}, ref) => {
|
||||
const dateDayjs = useMemo(() => dayjs(date).isValid() ? dayjs.tz(date) : dayjs().tz(), [date]);
|
||||
const { isMobile } = useDeviceDetect();
|
||||
@@ -56,6 +60,8 @@ const DatePicker = forwardRef<HTMLDivElement, DatePickerProps>(({
|
||||
date={dateDayjs}
|
||||
format={format}
|
||||
onChange={handleChangeDate}
|
||||
minDate={minDate}
|
||||
maxDate={maxDate}
|
||||
/>
|
||||
</div>
|
||||
</Popper>
|
||||
|
||||
@@ -3,39 +3,52 @@ import { ChangeEvent, KeyboardEvent } from "react";
|
||||
import { CalendarIcon } from "../../Icons";
|
||||
import DatePicker from "../DatePicker";
|
||||
import Button from "../../Button/Button";
|
||||
import { DATE_TIME_FORMAT } from "../../../../constants/date";
|
||||
import { DATE_ISO_FORMAT, DATE_FORMAT, DATE_TIME_FORMAT } from "../../../../constants/date";
|
||||
import InputMask from "react-input-mask";
|
||||
import dayjs from "dayjs";
|
||||
import dayjs, { Dayjs } from "dayjs";
|
||||
import classNames from "classnames";
|
||||
import "./style.scss";
|
||||
|
||||
const formatStringDate = (val: string) => {
|
||||
return dayjs(val).isValid() ? dayjs.tz(val).format(DATE_TIME_FORMAT) : val;
|
||||
const formatStringDate = (val: string, format: string) => {
|
||||
return dayjs(val).isValid() ? dayjs.tz(val).format(format) : val;
|
||||
};
|
||||
|
||||
interface DateTimeInputProps {
|
||||
value?: string;
|
||||
label: string;
|
||||
pickerLabel: string;
|
||||
dateOnly?: boolean;
|
||||
format?: string;
|
||||
pickerRef: React.RefObject<HTMLDivElement>;
|
||||
onChange: (date: string) => void;
|
||||
onEnter: () => void;
|
||||
disabled?: boolean;
|
||||
minDate?: Date | Dayjs;
|
||||
maxDate?: Date | Dayjs;
|
||||
}
|
||||
|
||||
const masks: Record<string, string> = {
|
||||
[DATE_ISO_FORMAT]: "9999-99-99T99:99:99",
|
||||
[DATE_FORMAT]: "9999-99-99",
|
||||
[DATE_TIME_FORMAT]: "9999-99-99 99:99:99"
|
||||
};
|
||||
|
||||
const DateTimeInput: FC<DateTimeInputProps> = ({
|
||||
value = "",
|
||||
dateOnly = false,
|
||||
format = DATE_TIME_FORMAT,
|
||||
minDate,
|
||||
maxDate,
|
||||
label,
|
||||
pickerLabel,
|
||||
pickerRef,
|
||||
onChange,
|
||||
onEnter
|
||||
onEnter,
|
||||
disabled
|
||||
}) => {
|
||||
const wrapperRef = useRef<HTMLDivElement>(null);
|
||||
const [inputRef, setInputRef] = useState<HTMLInputElement | null>(null);
|
||||
const mask = masks[format];
|
||||
|
||||
const [maskedValue, setMaskedValue] = useState(formatStringDate(value));
|
||||
const [maskedValue, setMaskedValue] = useState(formatStringDate(value, format));
|
||||
const [focusToTime, setFocusToTime] = useState(false);
|
||||
const [awaitChangeForEnter, setAwaitChangeForEnter] = useState(false);
|
||||
const error = dayjs(maskedValue).isValid() ? "" : "Invalid date format";
|
||||
@@ -55,16 +68,13 @@ const DateTimeInput: FC<DateTimeInputProps> = ({
|
||||
}
|
||||
};
|
||||
|
||||
const mask = dateOnly ? "9999-99-99" : "9999-99-99 99:99:99";
|
||||
const placeholder = dateOnly ? "YYYY-MM-DD" : "YYYY-MM-DD HH:mm:ss";
|
||||
|
||||
const handleChangeDate = (val: string) => {
|
||||
setMaskedValue(val);
|
||||
setFocusToTime(true);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const newValue = formatStringDate(value);
|
||||
const newValue = formatStringDate(value, format);
|
||||
if (newValue !== maskedValue) {
|
||||
setMaskedValue(newValue);
|
||||
}
|
||||
@@ -87,7 +97,8 @@ const DateTimeInput: FC<DateTimeInputProps> = ({
|
||||
<div
|
||||
className={classNames({
|
||||
"vm-date-time-input": true,
|
||||
"vm-date-time-input_error": error
|
||||
"vm-date-time-input_error": error,
|
||||
"vm-date-time-input_disabled": disabled,
|
||||
})}
|
||||
>
|
||||
<label>{label}</label>
|
||||
@@ -95,7 +106,7 @@ const DateTimeInput: FC<DateTimeInputProps> = ({
|
||||
tabIndex={1}
|
||||
inputRef={setInputRef}
|
||||
mask={mask}
|
||||
placeholder={placeholder}
|
||||
placeholder={format}
|
||||
value={maskedValue}
|
||||
autoCapitalize={"none"}
|
||||
inputMode={"numeric"}
|
||||
@@ -103,6 +114,7 @@ const DateTimeInput: FC<DateTimeInputProps> = ({
|
||||
onChange={handleMaskedChange}
|
||||
onBlur={handleBlur}
|
||||
onKeyUp={handleKeyUp}
|
||||
disabled={disabled}
|
||||
/>
|
||||
{error && (
|
||||
<span className="vm-date-time-input__error-text">{error}</span>
|
||||
@@ -117,6 +129,7 @@ const DateTimeInput: FC<DateTimeInputProps> = ({
|
||||
size="small"
|
||||
startIcon={<CalendarIcon/>}
|
||||
ariaLabel="calendar"
|
||||
disabled={disabled}
|
||||
/>
|
||||
</div>
|
||||
<DatePicker
|
||||
@@ -125,6 +138,9 @@ const DateTimeInput: FC<DateTimeInputProps> = ({
|
||||
date={maskedValue}
|
||||
onChange={handleChangeDate}
|
||||
targetRef={wrapperRef}
|
||||
minDate={minDate}
|
||||
maxDate={maxDate}
|
||||
format={format}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -23,6 +23,14 @@
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
&_disabled {
|
||||
cursor: default;
|
||||
pointer-events: none;
|
||||
* {
|
||||
color: $color-text-disabled !important;
|
||||
}
|
||||
}
|
||||
|
||||
&__icon {
|
||||
position: absolute;
|
||||
bottom: 2px;
|
||||
|
||||
@@ -46,11 +46,12 @@ const Select: FC<SelectProps> = ({
|
||||
const autocompleteAnchorEl = useRef<HTMLDivElement>(null);
|
||||
const [wrapperRef, setWrapperRef] = useState<React.RefObject<HTMLElement> | null>(null);
|
||||
const [openList, setOpenList] = useState(false);
|
||||
const resultList = [...list];
|
||||
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
const isMultiple = Array.isArray(value);
|
||||
const selectedValues = Array.isArray(value) ? value.slice() : [];
|
||||
let selectedValues = Array.isArray(value) ? value.slice() : [];
|
||||
const hideInput = isMobile && isMultiple && !!selectedValues?.length;
|
||||
|
||||
const textFieldValue = useMemo(() => {
|
||||
@@ -77,7 +78,7 @@ const Select: FC<SelectProps> = ({
|
||||
};
|
||||
|
||||
const handleBlur = () => {
|
||||
list.includes(search) && onChange(search);
|
||||
resultList.includes(search) && onChange(search);
|
||||
};
|
||||
|
||||
const handleToggleList = (e: MouseEvent<HTMLDivElement>) => {
|
||||
@@ -123,8 +124,10 @@ const Select: FC<SelectProps> = ({
|
||||
useEventListener("keyup", handleKeyUp);
|
||||
useClickOutside(autocompleteAnchorEl, handleCloseList, wrapperRef);
|
||||
|
||||
includeAll && !list.includes("All") && list.push("All");
|
||||
includeAll && !selectedValues?.length && selectedValues.push("All");
|
||||
if (includeAll && !resultList.includes("All")) resultList.push("All");
|
||||
if (includeAll && (!selectedValues?.length || selectedValues?.length === resultList?.length)) {
|
||||
selectedValues = ["All"];
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
@@ -155,6 +158,7 @@ const Select: FC<SelectProps> = ({
|
||||
onInput={handleChange}
|
||||
onFocus={handleFocus}
|
||||
onBlur={handleBlur}
|
||||
disabled={disabled}
|
||||
ref={inputRef}
|
||||
readOnly={isMobile || !searchable}
|
||||
/>
|
||||
@@ -182,7 +186,7 @@ const Select: FC<SelectProps> = ({
|
||||
itemClassName={itemClassName}
|
||||
label={label}
|
||||
value={autocompleteValue}
|
||||
options={list.map(l => ({ value: l }))}
|
||||
options={resultList.map(l => ({ value: l }))}
|
||||
anchor={autocompleteAnchorEl}
|
||||
selected={selectedValues}
|
||||
minLength={1}
|
||||
|
||||
@@ -128,8 +128,14 @@
|
||||
}
|
||||
|
||||
&_disabled {
|
||||
pointer-events: none;
|
||||
* {
|
||||
cursor: not-allowed;
|
||||
color: var(--color-text-disabled);
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
input::placeholder {
|
||||
color: var(--color-text-disabled);
|
||||
}
|
||||
|
||||
.vm-select-input {
|
||||
|
||||
@@ -72,7 +72,6 @@ const TextField: FC<TextFieldProps> = ({
|
||||
"vm-text-field__input_error": error,
|
||||
"vm-text-field__input_warning": !error && warning,
|
||||
"vm-text-field__input_icon-start": startIcon,
|
||||
"vm-text-field__input_disabled": disabled,
|
||||
"vm-text-field__input_textarea": type === "textarea",
|
||||
});
|
||||
|
||||
@@ -136,7 +135,8 @@ const TextField: FC<TextFieldProps> = ({
|
||||
className={classNames({
|
||||
"vm-text-field": true,
|
||||
"vm-text-field_textarea": type === "textarea",
|
||||
"vm-text-field_dark": isDarkTheme
|
||||
"vm-text-field_dark": isDarkTheme,
|
||||
"vm-text-field_disabled": disabled
|
||||
})}
|
||||
data-replicated-value={value}
|
||||
>
|
||||
|
||||
@@ -6,6 +6,15 @@
|
||||
margin: 6px 0;
|
||||
width: 100%;
|
||||
|
||||
&_disabled {
|
||||
color: $color-text-disabled;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
&:is(&_disabled) > &__label {
|
||||
color: $color-text-disabled;
|
||||
}
|
||||
|
||||
&_textarea:after {
|
||||
content: attr(data-replicated-value) " ";
|
||||
white-space: pre-wrap;
|
||||
|
||||
@@ -155,15 +155,18 @@ const ExploreRules: FC = () => {
|
||||
[groups, types, states, searchInput]
|
||||
);
|
||||
|
||||
const selectedTypes = allTypes.size === types.length ? [] : types;
|
||||
const selectedStates = allStates.size === states.length ? [] : states;
|
||||
|
||||
return (
|
||||
<>
|
||||
{modalOpen && getModal()}
|
||||
{(!modalOpen || !!allStates?.size) && (
|
||||
<div className="vm-explore-alerts">
|
||||
<RulesHeader
|
||||
types={types}
|
||||
types={selectedTypes}
|
||||
allTypes={Array.from(allTypes)}
|
||||
states={states}
|
||||
states={selectedStates}
|
||||
allStates={Array.from(allStates)}
|
||||
onChangeTypes={handleChangeTypes}
|
||||
onChangeStates={handleChangeStates}
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
package apptest
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/url"
|
||||
"slices"
|
||||
@@ -503,44 +500,3 @@ func sortTSDBStatusResponseEntries(entries []TSDBStatusResponseEntry) {
|
||||
return left.Count < right.Count
|
||||
})
|
||||
}
|
||||
|
||||
// LogsQLQueryResponse is an in-memory representation of the
|
||||
// /select/logsql/query response.
|
||||
type LogsQLQueryResponse struct {
|
||||
LogLines []string
|
||||
}
|
||||
|
||||
// NewLogsQLQueryResponse is a test helper function that creates a new
|
||||
// instance of LogsQLQueryResponse by unmarshalling a json string.
|
||||
func NewLogsQLQueryResponse(t *testing.T, s string) *LogsQLQueryResponse {
|
||||
t.Helper()
|
||||
res := &LogsQLQueryResponse{}
|
||||
if len(s) == 0 {
|
||||
return res
|
||||
}
|
||||
bs := bytes.NewBufferString(s)
|
||||
for {
|
||||
logLine, err := bs.ReadString('\n')
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
if len(logLine) > 0 {
|
||||
t.Fatalf("BUG: unexpected non-empty line=%q with io.EOF", logLine)
|
||||
}
|
||||
break
|
||||
}
|
||||
t.Fatalf("BUG: cannot read logline from buffer: %s", err)
|
||||
}
|
||||
var lv map[string]any
|
||||
if err := json.Unmarshal([]byte(logLine), &lv); err != nil {
|
||||
t.Fatalf("cannot parse log line=%q: %s", logLine, err)
|
||||
}
|
||||
delete(lv, "_stream_id")
|
||||
normalizedLine, err := json.Marshal(lv)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal parsed logline=%q: %s", logLine, err)
|
||||
}
|
||||
res.LogLines = append(res.LogLines, string(normalizedLine))
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@ func testSpecialQueryRegression(tc *apptest.TestCase, sut apptest.PrometheusWrit
|
||||
testTooBigLookbehindWindow(tc, sut)
|
||||
testMatchSeries(tc, sut)
|
||||
testNegativeIncrease(tc, sut)
|
||||
testInstantQueryWithOffsetUsingCache(tc, sut)
|
||||
|
||||
// graphite
|
||||
testComparisonNotInfNotNan(tc, sut)
|
||||
@@ -292,6 +293,45 @@ func testNegativeIncrease(tc *apptest.TestCase, sut apptest.PrometheusWriteQueri
|
||||
})
|
||||
}
|
||||
|
||||
func testInstantQueryWithOffsetUsingCache(tc *apptest.TestCase, sut apptest.PrometheusWriteQuerier) {
|
||||
t := tc.T()
|
||||
|
||||
// unexpected /api/v1/query response due to wrong applied offset to request range
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9762
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
`vm_http_requests_total 1 1758196800000`, // 2025-09-18 12:00:00
|
||||
`vm_http_requests_total 2 1758218400000`, // 2025-09-18 18:00:00
|
||||
`vm_http_requests_total 3 1758240000000`, // 2025-09-19 00:00:00
|
||||
`vm_http_requests_total 4 1758261600000`, // 2025-09-19 06:00:00
|
||||
`vm_http_requests_total 5 1758283200000`, // 2025-09-19 12:00:00
|
||||
`vm_http_requests_total 6 1758304800000`, // 2025-09-19 18:00:00
|
||||
`vm_http_requests_total 7 1758326400000`, // 2025-09-20 00:00:00
|
||||
}, apptest.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Msg: "unexpected /api/v1/query response",
|
||||
DoNotRetry: true,
|
||||
Got: func() any {
|
||||
return sut.PrometheusAPIV1Query(t, `avg_over_time(vm_http_requests_total[1d] offset 12h)`, apptest.QueryOpts{
|
||||
Time: "2025-09-20T12:00:00.000Z",
|
||||
})
|
||||
},
|
||||
Want: &apptest.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &apptest.QueryData{
|
||||
ResultType: "vector",
|
||||
Result: []*apptest.QueryResult{
|
||||
{
|
||||
Metric: map[string]string{},
|
||||
Sample: &apptest.Sample{Timestamp: 1758369600000, Value: 5.5},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func testComparisonNotInfNotNan(tc *apptest.TestCase, sut apptest.PrometheusWriteQuerier) {
|
||||
t := tc.T()
|
||||
|
||||
|
||||
@@ -1786,4 +1786,4 @@
|
||||
"uid": "gF-lxRdVz",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1168,7 +1168,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99,sum(rate(controller_runtime_reconcile_time_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by(le,controller) )",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(controller_runtime_reconcile_time_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le, controller) )",
|
||||
"legendFormat": "q.99 {{controller}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -1265,7 +1265,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(rest_client_requests_total{job=~\"$job\"}[$__interval])) by (method,code)",
|
||||
"expr": "sum(rate(rest_client_requests_total{job=~\"$job\"}[$__interval])) by (method, code)",
|
||||
"instant": false,
|
||||
"legendFormat": "{{method}} {{code}}",
|
||||
"range": true,
|
||||
@@ -1489,7 +1489,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, instance, le))) by(job)",
|
||||
"expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, instance, le))) by (job)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
@@ -1588,7 +1588,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99,sum(rate(rest_client_request_duration_seconds_bucket{job=~\"$job\"})) by(le,method,api) )",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le, method, api))",
|
||||
"instant": false,
|
||||
"legendFormat": "{{method}} {{api}}",
|
||||
"range": true,
|
||||
@@ -2135,6 +2135,16 @@
|
||||
"skipUrlSync": false,
|
||||
"sort": 2,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"baseFilters": [],
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"filters": [],
|
||||
"name": "adhoc",
|
||||
"type": "adhoc"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1950,6 +1950,16 @@
|
||||
],
|
||||
"query": "*",
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"baseFilters": [],
|
||||
"datasource": {
|
||||
"type": "victoriametrics-logs-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"filters": [],
|
||||
"name": "adhoc",
|
||||
"type": "adhoc"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -1962,4 +1972,4 @@
|
||||
"title": "Query Stats (cluster)",
|
||||
"uid": "feg3od1zt1fy8e",
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1787,4 +1787,4 @@
|
||||
"uid": "gF-lxRdVz_vm",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1994,7 +1994,7 @@
|
||||
"baseFilters": [],
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "PE8D8DB4BEE4E4B22"
|
||||
"uid": "$ds"
|
||||
},
|
||||
"filters": [],
|
||||
"name": "adhoc",
|
||||
|
||||
@@ -2136,6 +2136,16 @@
|
||||
"skipUrlSync": false,
|
||||
"sort": 2,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"baseFilters": [],
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"filters": [],
|
||||
"name": "adhoc",
|
||||
"type": "adhoc"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -4238,4 +4238,4 @@
|
||||
"title": "VictoriaMetrics - vmalert (VM)",
|
||||
"uid": "LzldHAVnz_vm",
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2652,7 +2652,7 @@
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
"uid": "P38648FE0F8C5BEA2"
|
||||
"uid": "$ds"
|
||||
},
|
||||
"filters": [],
|
||||
"hide": 0,
|
||||
|
||||
@@ -7,7 +7,7 @@ ROOT_IMAGE ?= alpine:3.22.1
|
||||
ROOT_IMAGE_SCRATCH ?= scratch
|
||||
CERTS_IMAGE := alpine:3.22.1
|
||||
|
||||
GO_BUILDER_IMAGE := golang:1.25.0
|
||||
GO_BUILDER_IMAGE := golang:1.25.1
|
||||
|
||||
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr :/ __)-1
|
||||
BASE_IMAGE := local/base:1.1.4-$(shell echo $(ROOT_IMAGE) | tr :/ __)-$(shell echo $(CERTS_IMAGE) | tr :/ __)
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.125.1
|
||||
image: victoriametrics/vmagent:v1.126.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -19,7 +19,7 @@ services:
|
||||
restart: always
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:12.1.1
|
||||
image: grafana/grafana:12.2.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -37,14 +37,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.125.1-cluster
|
||||
image: victoriametrics/vmstorage:v1.126.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.125.1-cluster
|
||||
image: victoriametrics/vmstorage:v1.126.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.125.1-cluster
|
||||
image: victoriametrics/vminsert:v1.126.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -63,7 +63,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.125.1-cluster
|
||||
image: victoriametrics/vminsert:v1.126.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.125.1-cluster
|
||||
image: victoriametrics/vmselect:v1.126.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -85,7 +85,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.125.1-cluster
|
||||
image: victoriametrics/vmselect:v1.126.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -100,7 +100,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.125.1
|
||||
image: victoriametrics/vmauth:v1.126.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -114,7 +114,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.125.1
|
||||
image: victoriametrics/vmalert:v1.126.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -138,7 +138,7 @@ services:
|
||||
# alertmanager receives alerting notifications from vmalert
|
||||
# and distributes them according to --config.file.
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.28.0
|
||||
image: prom/alertmanager:v0.28.1
|
||||
volumes:
|
||||
- ./alertmanager.yml:/config/alertmanager.yml
|
||||
command:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.125.1
|
||||
image: victoriametrics/vmagent:v1.126.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.125.1
|
||||
image: victoriametrics/victoria-metrics:v1.126.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -38,7 +38,7 @@ services:
|
||||
restart: always
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:12.1.1
|
||||
image: grafana/grafana:12.2.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.125.1
|
||||
image: victoriametrics/vmalert:v1.126.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
@@ -79,7 +79,7 @@ services:
|
||||
# alertmanager receives alerting notifications from vmalert
|
||||
# and distributes them according to --config.file.
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.28.0
|
||||
image: prom/alertmanager:v0.28.1
|
||||
volumes:
|
||||
- ./alertmanager.yml:/config/alertmanager.yml
|
||||
command:
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
groups:
|
||||
- name: log-rules
|
||||
type: vlogs
|
||||
interval: 30s
|
||||
rules:
|
||||
- alert: AlwaysFiring
|
||||
expr: '* | stats count()'
|
||||
annotations:
|
||||
description: "Generated more than {{$value}} log entries in the last 1 minute"
|
||||
- alert: TooManyLogs
|
||||
expr: '* | stats by (path) count() as total | filter total:>50'
|
||||
annotations:
|
||||
description: "Path {{$labels.path}} generated more than 50 log entries in the last 1 minute: {{$value}}"
|
||||
- record: path:logs:count
|
||||
expr: '* | stats by (path) count()'
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.125.1
|
||||
image: victoriametrics/vmagent:v1.126.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.125.1
|
||||
image: victoriametrics/victoria-metrics:v1.126.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -27,7 +27,7 @@ services:
|
||||
restart: always
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:12.1.1
|
||||
image: grafana/grafana:12.2.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.125.1
|
||||
image: victoriametrics/vmalert:v1.126.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -73,7 +73,7 @@ services:
|
||||
- "/config.yaml"
|
||||
- "--licenseFile=/license"
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.28.0
|
||||
image: prom/alertmanager:v0.28.1
|
||||
volumes:
|
||||
- ./alertmanager.yml:/config/alertmanager.yml
|
||||
command:
|
||||
@@ -83,7 +83,7 @@ services:
|
||||
restart: always
|
||||
|
||||
node-exporter:
|
||||
image: quay.io/prometheus/node-exporter:v1.7.0
|
||||
image: quay.io/prometheus/node-exporter:v1.9.1
|
||||
ports:
|
||||
- 9100:9100
|
||||
pid: host
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
prepare-logs:
|
||||
cd ./source_logs && bash download.sh
|
||||
|
||||
docker-up-elk:
|
||||
docker-compose -f docker-compose.yml -f docker-compose-elk.yml up -d
|
||||
|
||||
docker-stop-elk:
|
||||
docker-compose -f docker-compose.yml -f docker-compose-elk.yml stop
|
||||
|
||||
docker-up-loki:
|
||||
docker-compose -f docker-compose.yml -f docker-compose-loki.yml up -d
|
||||
|
||||
docker-stop-loki:
|
||||
docker-compose -f docker-compose.yml -f docker-compose-loki.yml stop
|
||||
|
||||
docker-cleanup:
|
||||
docker-compose -f docker-compose.yml -f docker-compose-elk.yml -f docker-compose-loki.yml down -v --remove-orphans
|
||||
@@ -1,69 +0,0 @@
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
filebeat-elastic:
|
||||
image: docker.elastic.co/beats/filebeat:8.8.0
|
||||
restart: on-failure
|
||||
volumes:
|
||||
- ./elk/filebeat/filebeat-elastic.yml:/usr/share/filebeat/filebeat.yml:ro
|
||||
depends_on:
|
||||
- elastic
|
||||
|
||||
filebeat-vlogs:
|
||||
image: docker.elastic.co/beats/filebeat:8.8.0
|
||||
restart: on-failure
|
||||
volumes:
|
||||
- ./elk/filebeat/filebeat-vlogs.yml:/usr/share/filebeat/filebeat.yml:ro
|
||||
depends_on:
|
||||
- vlogs
|
||||
|
||||
generator:
|
||||
image: golang:1.25.0-alpine
|
||||
restart: always
|
||||
working_dir: /go/src/app
|
||||
volumes:
|
||||
- ./generator:/go/src/app
|
||||
- ./source_logs:/go/src/source_logs
|
||||
command:
|
||||
- go
|
||||
- run
|
||||
- main.go
|
||||
- -logsPath=/go/src/source_logs/logs
|
||||
- -outputRateLimitItems=10000
|
||||
- -syslog.addr=filebeat-elastic:12345
|
||||
- -syslog.addr2=filebeat-vlogs:12345
|
||||
- -logs.randomSuffix=false
|
||||
depends_on: [filebeat-elastic, filebeat-vlogs]
|
||||
|
||||
elastic:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
|
||||
volumes:
|
||||
- ./elk/elastic/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
|
||||
- elastic:/usr/share/elasticsearch/data
|
||||
environment:
|
||||
ES_JAVA_OPTS: "-Xmx2048m"
|
||||
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:8.8.0
|
||||
volumes:
|
||||
- ./elk/kibana/kibana.yml:/usr/share/kibana/config/kibana.yml
|
||||
ports:
|
||||
- "5601:5601"
|
||||
depends_on: [elastic]
|
||||
|
||||
beat-exporter-elastic:
|
||||
image: trustpilot/beat-exporter:0.4.0
|
||||
command:
|
||||
- -beat.uri=http://filebeat-elastic:5066
|
||||
depends_on:
|
||||
- filebeat-elastic
|
||||
|
||||
beat-exporter-vlogs:
|
||||
image: trustpilot/beat-exporter:0.4.0
|
||||
command:
|
||||
- -beat.uri=http://filebeat-vlogs:5066
|
||||
depends_on:
|
||||
- filebeat-vlogs
|
||||
|
||||
volumes:
|
||||
elastic:
|
||||
@@ -1,51 +0,0 @@
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
generator:
|
||||
image: golang:1.25.0-alpine
|
||||
restart: always
|
||||
working_dir: /go/src/app
|
||||
volumes:
|
||||
- ./generator:/go/src/app
|
||||
- ./source_logs:/go/src/source_logs
|
||||
command:
|
||||
- go
|
||||
- run
|
||||
- main.go
|
||||
- -logsPath=/go/src/source_logs/logs
|
||||
- -outputRateLimitItems=10000
|
||||
- -outputRateLimitPeriod=1s
|
||||
- -syslog.addr=rsyslog:514
|
||||
- -syslog.addr2=rsyslog:514
|
||||
- -logs.randomSuffix=false
|
||||
depends_on: [rsyslog]
|
||||
|
||||
loki:
|
||||
image: grafana/loki:2.9.0
|
||||
user: 0:0
|
||||
ports:
|
||||
- "3100:3100"
|
||||
command: -config.file=/etc/loki/loki-config.yaml
|
||||
volumes:
|
||||
- loki:/tmp/loki
|
||||
- ./loki/:/etc/loki/
|
||||
|
||||
promtail:
|
||||
image: grafana/promtail:2.9.0
|
||||
command: -config.file=/etc/promtail/promtail-config.yaml
|
||||
volumes:
|
||||
- ./loki/:/etc/promtail/
|
||||
depends_on:
|
||||
- loki
|
||||
- vlogs
|
||||
|
||||
rsyslog:
|
||||
build:
|
||||
dockerfile: Dockerfile
|
||||
context: rsyslog
|
||||
volumes:
|
||||
- ./rsyslog/rsyslog.conf:/etc/rsyslog.conf
|
||||
depends_on: [promtail]
|
||||
|
||||
volumes:
|
||||
loki:
|
||||
@@ -1,74 +0,0 @@
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
# Run `make package-victoria-logs` to build victoria-logs image
|
||||
vlogs:
|
||||
image: docker.io/victoriametrics/victoria-logs:v1.24.0-victorialogs
|
||||
volumes:
|
||||
- vlogs:/vlogs
|
||||
ports:
|
||||
- "9428:9428"
|
||||
command:
|
||||
- -storageDataPath=/vlogs
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:v0.47.0
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- "--path.procfs=/host/proc"
|
||||
- "--path.rootfs=/rootfs"
|
||||
- "--path.sysfs=/host/sys"
|
||||
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
|
||||
|
||||
du-exporter:
|
||||
image: ghcr.io/dundee/disk_usage_exporter/disk_usage_exporter-c4084307c537335c2ddb6f4b9b527422:latest
|
||||
restart: unless-stopped
|
||||
user: "root"
|
||||
volumes:
|
||||
- /var/lib/docker/volumes:/var/lib/docker/volumes:ro
|
||||
- ./du/config.yml:/config.yml:ro
|
||||
command:
|
||||
- "--config=/config.yml"
|
||||
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.109.0
|
||||
ports:
|
||||
- "8428:8428"
|
||||
command:
|
||||
- -storageDataPath=/vmsingle
|
||||
- -promscrape.config=/promscrape.yml
|
||||
- -promscrape.maxScrapeSize=1Gb
|
||||
volumes:
|
||||
- vmsingle:/vmsingle
|
||||
- ./vmsingle/promscrape.yml:/promscrape.yml
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:12.1.1
|
||||
depends_on: [vmsingle]
|
||||
ports:
|
||||
- 3000:3000
|
||||
volumes:
|
||||
- grafanadata:/var/lib/grafana
|
||||
- ./grafana/provisioning/:/etc/grafana/provisioning/
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards/
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
vlogs:
|
||||
vmsingle:
|
||||
grafanadata: {}
|
||||
@@ -1,3 +0,0 @@
|
||||
analyzed-path: /var/lib/docker/volumes
|
||||
bind-address: 0.0.0.0:9995
|
||||
dir-level: 1
|
||||
@@ -1,4 +0,0 @@
|
||||
cluster.name: "bench"
|
||||
network.host: 0.0.0.0
|
||||
xpack.security.enabled: false
|
||||
discovery.type: single-node
|
||||
@@ -1,15 +0,0 @@
|
||||
filebeat.inputs:
|
||||
- type: syslog
|
||||
format: rfc3164
|
||||
protocol.tcp:
|
||||
host: "0.0.0.0:12345"
|
||||
|
||||
output.elasticsearch:
|
||||
hosts: [ "http://elastic:9200" ]
|
||||
worker: 5
|
||||
bulk_max_size: 1000
|
||||
|
||||
http:
|
||||
enabled: true
|
||||
host: 0.0.0.0
|
||||
port: 5066
|
||||
@@ -1,19 +0,0 @@
|
||||
filebeat.inputs:
|
||||
- type: syslog
|
||||
format: rfc3164
|
||||
protocol.tcp:
|
||||
host: "0.0.0.0:12345"
|
||||
|
||||
output.elasticsearch:
|
||||
hosts: [ "http://vlogs:9428/insert/elasticsearch/" ]
|
||||
worker: 5
|
||||
bulk_max_size: 1000
|
||||
parameters:
|
||||
_msg_field: "message"
|
||||
_time_field: "@timestamp"
|
||||
_stream_fields: "host.name,process.program,process.pid"
|
||||
|
||||
http:
|
||||
enabled: true
|
||||
host: 0.0.0.0
|
||||
port: 5066
|
||||
@@ -1,3 +0,0 @@
|
||||
server.name: kibana
|
||||
server.host: "0.0.0.0"
|
||||
elasticsearch.hosts: [ "http://elastic:9200" ]
|
||||
@@ -1,106 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/binary"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"log/syslog"
|
||||
"math/rand"
|
||||
"net"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
logsPath = flag.String("logsPath", "", "Path to logs directory")
|
||||
syslogAddr = flag.String("syslog.addr", "logstash:12345", "Addr to send logs to")
|
||||
syslogAddr2 = flag.String("syslog.addr2", "logstash:12345", "Addr to send logs to")
|
||||
randomSuffix = flag.Bool("logs.randomSuffix", false, "Whether to add a random suffix to a log line")
|
||||
|
||||
outputRateLimitItems = flag.Int("outputRateLimitItems", 100, "Number of items to send per second")
|
||||
outputRateLimitPeriod = flag.Duration("outputRateLimitPeriod", time.Second, "Period of time to send items")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
startedAt := time.Now().Unix()
|
||||
|
||||
logFiles, err := os.ReadDir(*logsPath)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("error reading directory %s:%w", *logsPath, err))
|
||||
}
|
||||
|
||||
sourceFiles := make([]string, 0)
|
||||
|
||||
for _, logFile := range logFiles {
|
||||
if strings.HasSuffix(logFile.Name(), ".log") {
|
||||
sourceFiles = append(sourceFiles, logFile.Name())
|
||||
}
|
||||
}
|
||||
log.Printf("sourceFiles: %v", sourceFiles)
|
||||
log.Printf("running with rate limit: %d items per %s", *outputRateLimitItems, *outputRateLimitPeriod)
|
||||
|
||||
limitTicker := time.NewTicker(*outputRateLimitPeriod)
|
||||
limitItems := *outputRateLimitItems
|
||||
limiter := make(chan struct{}, limitItems)
|
||||
go func() {
|
||||
for {
|
||||
<-limitTicker.C
|
||||
for i := 0; i < limitItems; i++ {
|
||||
limiter <- struct{}{}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
for _, sourceFile := range sourceFiles {
|
||||
log.Printf("sourceFile: %s", sourceFile)
|
||||
f, err := os.Open(*logsPath + "/" + sourceFile)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
syslogTag := "logs-benchmark-" + sourceFile + "-" + strconv.FormatInt(startedAt, 10)
|
||||
|
||||
// Loki uses RFC5424 syslog format, which has a 48 character limit on the tag.
|
||||
tagLen := len(syslogTag)
|
||||
if tagLen > 48 {
|
||||
truncate := tagLen - 48
|
||||
syslogTag = syslogTag[truncate:]
|
||||
}
|
||||
logger, err := syslog.Dial("tcp", *syslogAddr, syslog.LOG_INFO, syslogTag)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("error dialing syslog: %w", err))
|
||||
}
|
||||
logger2, err := syslog.Dial("tcp", *syslogAddr2, syslog.LOG_INFO, syslogTag)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("error dialing syslog: %w", err))
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
<-limiter
|
||||
line := scanner.Text()
|
||||
if *randomSuffix {
|
||||
line = line + " " + randomString()
|
||||
}
|
||||
_ = logger.Info(line)
|
||||
_ = logger2.Info(line)
|
||||
}
|
||||
|
||||
logger.Close()
|
||||
logger2.Close()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func randomString() string {
|
||||
buf := make([]byte, 4)
|
||||
ip := rand.Uint32()
|
||||
|
||||
binary.LittleEndian.PutUint32(buf, ip)
|
||||
return net.IP(buf).String()
|
||||
}
|
||||
@@ -1,393 +0,0 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "victoria-logs"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*elastic.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "elasticsearch"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 5,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true
|
||||
},
|
||||
"pluginVersion": "9.2.7",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{name=~\"$containers\"}[5m])) by (name) * 100",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "victoria-logs"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*elastic.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "elasticsearch"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 5,
|
||||
"x": 5,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true
|
||||
},
|
||||
"pluginVersion": "9.2.7",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(container_memory_rss{name=~\"$containers\"}[5m]) by (name)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "victoria-logs"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*elastic.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "elasticsearch"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 5,
|
||||
"x": 10,
|
||||
"y": 0
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true
|
||||
},
|
||||
"pluginVersion": "9.2.7",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(node_disk_usage_bytes{path=~\"$containers_selector\"}[5m]) by (path)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk space used",
|
||||
"type": "bargauge"
|
||||
}
|
||||
],
|
||||
"refresh": false,
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"hide": 2,
|
||||
"name": "containers_selector",
|
||||
"query": ".*(vlogs|elastic).*",
|
||||
"skipUrlSync": false,
|
||||
"type": "constant"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"logs-benchmark-elastic-1",
|
||||
"logs-benchmark-vlogs-1"
|
||||
],
|
||||
"value": [
|
||||
"logs-benchmark-elastic-1",
|
||||
"logs-benchmark-vlogs-1"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"definition": "label_values(container_cpu_usage_seconds_total{name!=\"\",}, name)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "",
|
||||
"multi": true,
|
||||
"name": "containers",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(container_cpu_usage_seconds_total{name!=\"\",}, name)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": ".*vlogs|elastic.*",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Elastic vs VLogs - stats only",
|
||||
"uid": "hkm6P6_4J",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -1,554 +0,0 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"last"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(filebeat_libbeat_output_events{type=\"acked\"})) by (instance, type)",
|
||||
"legendFormat": "{{instance}} - {{type}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Filebeat items",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*elastic.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"min",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{name=~\"$containers\"})) by (name)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU cores usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*elastic.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 7
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(container_memory_rss{name=~\"$containers\"}) by (name)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*elastic.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(node_disk_usage_bytes{path=~\"$containers_selector\"}) by (path)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk space used",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"hide": 2,
|
||||
"name": "containers_selector",
|
||||
"query": ".*(vlogs|elastic).*",
|
||||
"skipUrlSync": false,
|
||||
"type": "constant"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"definition": "label_values(container_cpu_usage_seconds_total{name!=\"\",}, name)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "",
|
||||
"multi": true,
|
||||
"name": "containers",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(container_cpu_usage_seconds_total{name!=\"\",}, name)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": ".*benchmark-vlogs|benchmark-elastic.*",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-15m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Elastic vs VLogs",
|
||||
"uid": "hkm6P6_4z",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -1,597 +0,0 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*VictoriaLogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*Loki.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"last"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vl_rows_ingested_total))",
|
||||
"legendFormat": "VictoriaLogs",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(loki_distributor_lines_received_total)",
|
||||
"hide": false,
|
||||
"legendFormat": "Loki",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Ingestion rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*loki.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"min",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{name=~\"$containers\"})) by (name)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU cores usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*loki.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 7
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(container_memory_rss{name=~\"$containers\"}) by (name)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*vlogs.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "yellow",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": ".*loki.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"max",
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(node_disk_usage_bytes{path=~\"$containers_selector\"}) by (path)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk space used",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"hide": 2,
|
||||
"name": "containers_selector",
|
||||
"query": ".*(vlogs|loki).*",
|
||||
"skipUrlSync": false,
|
||||
"type": "constant"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "P4169E866C3094E38"
|
||||
},
|
||||
"definition": "label_values(container_cpu_usage_seconds_total{name!=\"\",}, name)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "",
|
||||
"multi": true,
|
||||
"name": "containers",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(container_cpu_usage_seconds_total{name!=\"\",}, name)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": ".*benchmark-vlogs|benchmark-loki.*",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Loki vs VLogs",
|
||||
"uid": "hkm6P6_4y",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: Prometheus
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
@@ -1,8 +0,0 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: VictoriaMetrics
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://vmsingle:8428
|
||||
isDefault: true
|
||||
@@ -1,63 +0,0 @@
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9096
|
||||
|
||||
common:
|
||||
instance_addr: 0.0.0.0
|
||||
path_prefix: /tmp/loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /tmp/loki/chunks
|
||||
rules_directory: /tmp/loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
query_range:
|
||||
results_cache:
|
||||
cache:
|
||||
embedded_cache:
|
||||
enabled: true
|
||||
max_size_mb: 100
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v11
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
ruler:
|
||||
alertmanager_url: http://localhost:9093
|
||||
|
||||
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
|
||||
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
|
||||
#
|
||||
# Statistics help us better understand how Loki is used, and they show us performance
|
||||
# levels for most users. This helps us prioritize features and documentation.
|
||||
# For more information on what's sent, look at
|
||||
# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
|
||||
# Refer to the buildReport method to see what goes into a report.
|
||||
#
|
||||
# If you would like to disable reporting, uncomment the following lines:
|
||||
analytics:
|
||||
reporting_enabled: false
|
||||
|
||||
|
||||
limits_config:
|
||||
ingestion_rate_mb: 10240
|
||||
ingestion_burst_size_mb: 10240
|
||||
max_streams_per_user: 10000000
|
||||
max_global_streams_per_user: 10000000
|
||||
|
||||
retention_period: 30d
|
||||
|
||||
per_stream_rate_limit: 10240M
|
||||
per_stream_rate_limit_burst: 10240M
|
||||
cardinality_limit: 20000000
|
||||
@@ -1,26 +0,0 @@
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: http://loki:3100/loki/api/v1/push
|
||||
- url: http://vlogs:9428/insert/loki/api/v1/push?_stream_fields=hostname,application
|
||||
# batchwait: 5s
|
||||
# batchsize: 5242880
|
||||
|
||||
scrape_configs:
|
||||
- job_name: syslog
|
||||
syslog:
|
||||
listen_address: 0.0.0.0:5140
|
||||
idle_timeout: 12h
|
||||
use_incoming_timestamp: true
|
||||
labels:
|
||||
job: syslog
|
||||
relabel_configs:
|
||||
- source_labels: [__syslog_message_hostname]
|
||||
target_label: hostname
|
||||
- source_labels: [__syslog_message_app_name]
|
||||
target_label: application
|
||||
@@ -1,125 +0,0 @@
|
||||
# Benchmark for VictoriaLogs
|
||||
|
||||
Benchmark compares VictoriaLogs with ELK stack and Grafana Loki.
|
||||
|
||||
Benchmark is based on:
|
||||
|
||||
- Logs from this repository - [https://github.com/logpai/loghub](https://github.com/logpai/loghub)
|
||||
- [logs generator](./generator)
|
||||
|
||||
For ELK suite it uses:
|
||||
|
||||
- filebeat - [https://www.elastic.co/beats/filebeat](https://www.elastic.co/beats/filebeat)
|
||||
- elastic + kibana
|
||||
|
||||
For Grafana Loki suite it uses:
|
||||
|
||||
- [Promtail](https://grafana.com/docs/loki/latest/send-data/promtail/)
|
||||
- rsyslog - required to push logs in RFC5424 format to Promtail
|
||||
- [Loki](https://grafana.com/oss/loki/)
|
||||
|
||||
## How it works
|
||||
|
||||
[docker-compose.yml](./docker-compose.yml) contains common configurations for all suites:
|
||||
|
||||
- VictoriaLogs instance
|
||||
- vmsingle - port forwarded to `localhost:8428` to see UI
|
||||
- exporters for system metrics
|
||||
|
||||
ELK suite uses [docker-compose-elk.yml](./docker-compose-elk.yml) with the following services:
|
||||
|
||||
- [logs generator](./generator) which generates logs and sends them to filebeat instances via syslog
|
||||
- 2 filebeat instances - one for elastic and one for VictoriaLogs.
|
||||
- elastic instance
|
||||
- kibana instance - port forwarded to `localhost:5601` to see UI
|
||||
|
||||
Loki suite uses [docker-compose-loki.yml](./docker-compose-loki.yml) with the following services:
|
||||
|
||||
- [logs generator](./generator) which generates logs and sends them rsyslog
|
||||
- rsyslog instance - sends logs to Promtail
|
||||
- Promtail instance - sends logs to Loki and VictoriaLogs
|
||||
- Loki instance
|
||||
|
||||
[Logs generator](./generator) generates logs based on logs located at `./source_logs/logs` and sends them to filebeat
|
||||
instances via syslog.
|
||||
Logs are generated by reading files line by line, adding randomized suffix to each line and sending them to filebeat via
|
||||
syslog.
|
||||
By default, generator will exit once all files are read. `docker-compose` will restart it and files will be read again
|
||||
generating new logs.
|
||||
|
||||
Each filebeat than writes logs to elastic and VictoriaLogs via elasticsearch-compatible API.
|
||||
|
||||
## How to run
|
||||
|
||||
1. Download and unarchive logs by running:
|
||||
|
||||
```shell
|
||||
cd source_logs
|
||||
bash download.sh
|
||||
```
|
||||
|
||||
Note that with logs listed in `download.sh` it will require 49GB of free space:
|
||||
|
||||
- 3GB for archives
|
||||
- 46GB for unarchived logs
|
||||
|
||||
If it is needed to minimize disk footprint, you can download only some of them by commenting out lines in `download.sh`.
|
||||
Unarchived logs size per file for reference:
|
||||
|
||||
```shell
|
||||
2.3M Linux.log
|
||||
73M SSH.log
|
||||
32G Thunderbird.log
|
||||
5.1M Apache.log
|
||||
13G hadoop-*.log
|
||||
```
|
||||
|
||||
1. (optional) If needed, adjust amount of logs sent by generator by modifying `-outputRateLimitItems` and
|
||||
`outputRateLimitPeriod` parameters in [docker-compose.yml](./docker-compose.yml). By default, it is configured to
|
||||
send 10000 logs per second.
|
||||
|
||||
1. (optional) Build victoria-logs image and adjust `image` parameter in [docker-compose.yml](./docker-compose.yml):
|
||||
|
||||
```shell
|
||||
make package-victoria-logs
|
||||
```
|
||||
|
||||
Image name should be replaced at `vlogs` service in [docker-compose.yml](./docker-compose.yml).
|
||||
|
||||
It is also possible to configure filebeat to send logs to VictoriaLogs running on local machine.
|
||||
To do this modify [filebeat config for vlogs](./elk/filebeat/filebeat-vlogs.yml) and replace `vlogs` address
|
||||
with address of local VictoriaLogs instance:
|
||||
|
||||
```yaml
|
||||
output.elasticsearch:
|
||||
hosts: [ "http://vlogs:9428/insert/elasticsearch/" ]
|
||||
```
|
||||
|
||||
1. Choose a suite to run.
|
||||
|
||||
In order to run ELK suite use the following command:
|
||||
|
||||
```sh
|
||||
make docker-up-elk
|
||||
```
|
||||
|
||||
In order to run Loki suite use the following command:
|
||||
|
||||
```sh
|
||||
make docker-up-loki
|
||||
```
|
||||
|
||||
1. Navigate to `http://localhost:3000/` to see Grafana dashboards with resource usage
|
||||
comparison.
|
||||
|
||||
Navigate to `http://localhost:3000/d/hkm6P6_4z/elastic-vs-vlogs` to see ELK suite results.
|
||||
|
||||
Navigate to `http://localhost:3000/d/hkm6P6_4y/loki-vs-vlogs` to see Loki suite results.
|
||||
|
||||
Example results vs ELK:
|
||||
|
||||

|
||||
|
||||
Example results vs Loki:
|
||||
|
||||

|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 157 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 176 KiB |
@@ -1,9 +0,0 @@
|
||||
FROM alpine:3
|
||||
RUN apk add --no-cache rsyslog rsyslog-tls tzdata
|
||||
|
||||
COPY rsyslog.conf /etc/rsyslog.conf
|
||||
|
||||
VOLUME /var/run/rsyslog/dev
|
||||
EXPOSE 514 10514
|
||||
|
||||
CMD ["/usr/sbin/rsyslogd", "-n"]
|
||||
@@ -1,12 +0,0 @@
|
||||
module(load="imudp")
|
||||
input(type="imudp" port="514")
|
||||
|
||||
module(load="imtcp")
|
||||
input(type="imtcp" port="514")
|
||||
|
||||
*.* action(type="omfwd"
|
||||
protocol="tcp" target="promtail" port="5140"
|
||||
Template="RSYSLOG_SyslogProtocol23Format"
|
||||
TCP_Framing="octet-counted" KeepAlive="on"
|
||||
action.resumeRetryCount="-1"
|
||||
queue.type="linkedlist" queue.size="1000000")
|
||||
@@ -1,2 +0,0 @@
|
||||
*.log
|
||||
*.tar.gz
|
||||
@@ -1,34 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
# Unarchived size: 5.1M Apache.log
|
||||
if [ ! -f Apache.tar.gz ]; then
|
||||
curl -o Apache.tar.gz -L -C - https://zenodo.org/records/3227177/files/Apache.tar.gz?download=1
|
||||
fi
|
||||
|
||||
# Unarchived size: 13G hadoop-*.log
|
||||
if [ ! -f HDFS_2.tar.gz ]; then
|
||||
curl -o HDFS_2.tar.gz -L -C - https://zenodo.org/records/3227177/files/HDFS_2.tar.gz?download=1
|
||||
fi
|
||||
|
||||
# Unarchived size: 2.3M Linux.log
|
||||
if [ ! -f Linux.tar.gz ]; then
|
||||
curl -o Linux.tar.gz -L -C - https://zenodo.org/records/3227177/files/Linux.tar.gz?download=1
|
||||
fi
|
||||
|
||||
# Unarchived size: 32G Thunderbird.log
|
||||
if [ ! -f Thunderbird.tar.gz ]; then
|
||||
curl -o Thunderbird.tar.gz -L -C - https://zenodo.org/records/3227177/files/Thunderbird.tar.gz?download=1
|
||||
fi
|
||||
|
||||
# Unarchived size: 73M SSH.log
|
||||
if [ ! -f SSH.tar.gz ]; then
|
||||
curl -o SSH.tar.gz -L -C - https://zenodo.org/records/3227177/files/SSH.tar.gz?download=1
|
||||
fi
|
||||
|
||||
mkdir -p logs
|
||||
|
||||
for file in *.tar.gz; do
|
||||
tar -xzf $file -C logs
|
||||
done
|
||||
@@ -1,30 +0,0 @@
|
||||
scrape_configs:
|
||||
- job_name: "filebeat"
|
||||
scrape_interval: 30s
|
||||
static_configs:
|
||||
- targets:
|
||||
- beat-exporter-elastic:9479
|
||||
- beat-exporter-vlogs:9479
|
||||
- job_name: "victoria-logs"
|
||||
scrape_interval: 30s
|
||||
static_configs:
|
||||
- targets:
|
||||
- vlogs:9428
|
||||
- job_name: "cadvisor"
|
||||
scrape_interval: 30s
|
||||
metric_relabel_configs:
|
||||
- action: labeldrop
|
||||
regex: "container_label_.*"
|
||||
static_configs:
|
||||
- targets:
|
||||
- cadvisor:8080
|
||||
- job_name: 'node'
|
||||
static_configs:
|
||||
- targets: ['node-exporter:9100']
|
||||
- targets: ['du-exporter:9995']
|
||||
- job_name: 'loki'
|
||||
static_configs:
|
||||
- targets: ['loki:3100']
|
||||
- job_name: 'promtail'
|
||||
static_configs:
|
||||
- targets: ['promtail:9080']
|
||||
@@ -10,9 +10,9 @@ sitemap:
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.125.1)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.125.1)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.125.1)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.126.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.126.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.126.0)
|
||||
- [Grafana](https://grafana.com/) (v.10.2.1)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.7.0) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.27.0)
|
||||
@@ -323,7 +323,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.125.1
|
||||
image: victoriametrics/vmagent:v1.126.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -340,7 +340,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.125.1
|
||||
image: victoriametrics/victoria-metrics:v1.126.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -373,7 +373,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.125.1
|
||||
image: victoriametrics/vmalert:v1.126.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -249,27 +249,27 @@ services:
|
||||
- grafana_data:/var/lib/grafana/
|
||||
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.125.1
|
||||
image: victoriametrics/victoria-metrics:v1.126.0
|
||||
command:
|
||||
- -httpListenAddr=0.0.0.0:8429
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.125.1-cluster
|
||||
image: victoriametrics/vmstorage:v1.126.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.125.1-cluster
|
||||
image: victoriametrics/vminsert:v1.126.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
- -httpListenAddr=0.0.0.0:8480
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.125.1-cluster
|
||||
image: victoriametrics/vmselect:v1.126.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
- -httpListenAddr=0.0.0.0:8481
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.125.1
|
||||
image: victoriametrics/vmagent:v1.126.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -278,7 +278,7 @@ services:
|
||||
- -remoteWrite.url=http://vmsingle:8429/api/v1/write
|
||||
|
||||
vmgateway-cluster:
|
||||
image: victoriametrics/vmgateway:v1.125.1-enterprise
|
||||
image: victoriametrics/vmgateway:v1.126.0-enterprise
|
||||
ports:
|
||||
- 8431:8431
|
||||
volumes:
|
||||
@@ -294,7 +294,7 @@ services:
|
||||
- -auth.oidcDiscoveryEndpoints=http://keycloak:8080/realms/master/.well-known/openid-configuration
|
||||
|
||||
vmgateway-single:
|
||||
image: victoriametrics/vmgateway:v1.125.1-enterprise
|
||||
image: victoriametrics/vmgateway:v1.126.0-enterprise
|
||||
ports:
|
||||
- 8432:8431
|
||||
volumes:
|
||||
@@ -405,7 +405,7 @@ Once iDP configuration is done, vmagent configuration needs to be updated to use
|
||||
|
||||
```yaml
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.125.1
|
||||
image: victoriametrics/vmagent:v1.126.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
- ./vmagent-client-secret:/etc/vmagent/oauth2-client-secret
|
||||
|
||||
@@ -125,7 +125,7 @@ As a reference, see resource consumption of VictoriaMetrics cluster on our [play
|
||||
The Retention Period is the number of days or months for storing data. It affects the disk space usage.
|
||||
The formula for calculating required disk space is the following:
|
||||
```
|
||||
Bytes Per Sample * Ingestion rate * Replication Factor * (Retention Period in Seconds +1 Retention Cycle(day or month)) * 1.2 (recommended 20% of dree space for merges )
|
||||
Bytes Per Sample * Ingestion rate * Replication Factor * (Retention Period in Seconds +1 Retention Cycle(day or month)) * 1.2 (recommended 20% of free space for merges )
|
||||
```
|
||||
|
||||
The **Retention Cycle** is one **day** or one **month**. If the retention period is higher than 30 days cycle is a month; otherwise day.
|
||||
|
||||
@@ -69,7 +69,6 @@ See also [case studies](https://docs.victoriametrics.com/victoriametrics/casestu
|
||||
* [Solving Metrics at scale with VictoriaMetrics](https://www.youtube.com/watch?v=QgLMztnj7-8)
|
||||
* [Monitoring as Code на базе VictoriaMetrics и Grafana](https://habr.com/ru/post/568090/)
|
||||
* [Push Prometheus metrics to VictoriaMetrics or other exporters](https://github.com/gistart/prometheus-push-client)
|
||||
* [Install and configure VictoriaMetrics on Debian](https://www.vultr.com/docs/install-and-configure-victoriametrics-on-debian)
|
||||
* [Superset BI with Victoria Metrics](https://cer6erus.medium.com/superset-bi-with-victoria-metrics-a109d3e91bc6)
|
||||
* [VictoriaMetrics Source Code Analysis - Bloom filter](https://www.sobyte.net/post/2022-05/victoriametrics-bloomfilter/)
|
||||
* [How we tried using VictoriaMetrics and Thanos at the same time](https://medium.com/@uburro/how-we-tried-using-victoriametrics-and-thanos-at-the-same-time-48803d2a638b)
|
||||
|
||||
@@ -77,8 +77,9 @@ Pull requests requirements:
|
||||
1. Avoid modifying code in the `/vendor` folder manually, even when the vendored package originates are from the VictoriaMetrics GitHub organization.
|
||||
For instance, VictoriaLogs vendors packages under the `/lib` folder from VictoriaMetrics, and VictoriaTraces vendors the `/lib/logstorage` package from VictoriaLogs.
|
||||
Submit a pull request to the upstream repository first. Afterward, a separate pull request can be opened to update the version of the vendored folder in downstream repository.
|
||||
The update of vendored package can be done with: run `go get` with the **tag** (avoid using the commit hash),
|
||||
and then run `go mod tidy` and `go mod vendor` to update the `go.mod`, `go.sum` and `/vendor`.
|
||||
* For common packages, the vendored package can be updated with this command: `go get <dependency>@vX.Y.Z`.
|
||||
* For VictoriaMetrics packages, use `go get <dependency>@canonical_commit_hash`.
|
||||
Finally, run `go mod tidy` and `go mod vendor` to update `go.mod`, `go.sum`, and `/vendor`.
|
||||
1. Ping reviewers who you think have the best expertise on the matter.
|
||||
|
||||
See good example of a [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6487).
|
||||
|
||||
@@ -251,7 +251,7 @@ If you need multi-AZ setup, then it is recommended running independent clusters
|
||||
into all the cluster - see [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy) for details.
|
||||
Then an additional `vmselect` nodes can be configured for reading the data from multiple clusters according to [these docs](#multi-level-cluster-setup).
|
||||
|
||||
See [victoria-metrics-distributed chart](https://docs.victoriametrics.com/helm/victoriametrics-distributed/) for an example.
|
||||
See [victoria-metrics-distributed chart](https://docs.victoriametrics.com/helm/victoria-metrics-distributed/) for an example.
|
||||
|
||||
## Cluster setup
|
||||
|
||||
@@ -376,8 +376,10 @@ The multi-level cluster setup for `vminsert` nodes has the following shortcoming
|
||||
- Data ingestion speed is limited by the slowest link to AZ.
|
||||
- `vminsert` nodes at top level re-route incoming data to the remaining AZs when some AZs are temporarily unavailable. This results in data gaps at AZs which were temporarily unavailable.
|
||||
|
||||
These issues are addressed by [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) when it runs in [multitenancy mode](https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy).
|
||||
`vmagent` buffers data, which must be sent to a particular AZ, when this AZ is temporarily unavailable. The buffer is stored on disk. The buffered data is sent to AZ as soon as it becomes available.
|
||||
These issues are addressed by [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) when it runs in front of AZs
|
||||
to [replicate](https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability) or [shard](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages)
|
||||
data stream. If AZ is temporarily unavailable, `vmagent` [buffers](https://docs.victoriametrics.com/victoriametrics/vmagent/#on-disk-persistence)
|
||||
its data on-disk (see `--remoteWrite.maxDiskUsagePerURL`) without affecting other destinations. The buffered data is sent to AZ as soon as it becomes available.
|
||||
|
||||
See the [cluster instability troubleshooting guide](https://docs.victoriametrics.com/victoriametrics/troubleshooting/#cluster-instability) for details on diagnosing and mitigating networking problems.
|
||||
|
||||
@@ -961,23 +963,7 @@ to ensure query results consistency, even if storage layer didn't complete dedup
|
||||
|
||||
## Backups
|
||||
|
||||
It is recommended performing periodical backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
|
||||
for protecting from user errors such as accidental data deletion.
|
||||
|
||||
The following steps must be performed for each `vmstorage` node for creating a backup:
|
||||
|
||||
1. Create an instant snapshot by navigating to `/snapshot/create` HTTP handler. It will create snapshot and return its name.
|
||||
1. Archive the created snapshot from `<-storageDataPath>/snapshots/<snapshot_name>` folder using [vmbackup](https://docs.victoriametrics.com/victoriametrics/vmbackup/).
|
||||
The archival process doesn't interfere with `vmstorage` work, so it may be performed at any suitable time.
|
||||
1. Delete unused snapshots via `/snapshot/delete?snapshot=<snapshot_name>` or `/snapshot/delete_all` in order to free up occupied storage space.
|
||||
|
||||
There is no need in synchronizing backups among all the `vmstorage` nodes.
|
||||
|
||||
Restoring from backup:
|
||||
|
||||
1. Stop `vmstorage` node with `kill -INT`.
|
||||
1. Restore data from backup using [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/) into `-storageDataPath` directory.
|
||||
1. Start `vmstorage` node.
|
||||
For backup configuration, please refer for [vmbackup documentation](https://docs.victoriametrics.com/victoriametrics/vmbackup/).
|
||||
|
||||
## Retention filters
|
||||
|
||||
|
||||
@@ -129,6 +129,12 @@ and new data is available for querying via Prometheus as usual.
|
||||
It is recommended using [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) for scraping Prometheus targets
|
||||
and writing data to VictoriaMetrics.
|
||||
|
||||
## How does VictoriaMetrics handle backfilling of old (historical) metrics?
|
||||
|
||||
VictoriaMetrics has no limitation on backfilling of old (historical) or out-of-order metrics while they're within
|
||||
the specified [retention period](https://docs.victoriametrics.com/victoriametrics/#retention).
|
||||
See more about [backfilling](https://docs.victoriametrics.com/victoriametrics/#backfilling).
|
||||
|
||||
## How does VictoriaMetrics compare to other remote storage solutions for Prometheus such as [M3DB](https://github.com/m3db/m3), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex), [Mimir](https://github.com/grafana/mimir), etc.?
|
||||
|
||||
* VictoriaMetrics is easier to configure and operate than competing solutions.
|
||||
|
||||
@@ -27,5 +27,5 @@ to [the latest available releases](https://docs.victoriametrics.com/victoriametr
|
||||
|
||||
## Currently supported LTS release lines
|
||||
|
||||
- v1.122.x - the latest one is [v1.122.3 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.3)
|
||||
- v1.110.x - the latest one is [v1.110.18 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.110.17)
|
||||
- v1.122.x - the latest one is [v1.122.4 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.4)
|
||||
- v1.110.x - the latest one is [v1.110.19 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.110.19)
|
||||
|
||||
@@ -123,7 +123,7 @@ The list of MetricsQL features on top of PromQL:
|
||||
* `if` binary operator. `q1 if q2` removes values from `q1` for missing values from `q2`.
|
||||
* `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for existing values from `q2`.
|
||||
* `WITH` templates. This feature simplifies writing and managing complex queries.
|
||||
Go to [WITH templates playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs) and try it.
|
||||
Go to [WITH templates playground](https://play.victoriametrics.com/select/0/prometheus/graph/#/expand-with-exprs) and try it.
|
||||
* String literals may be concatenated. This is useful with `WITH` templates:
|
||||
`WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`.
|
||||
* `keep_metric_names` modifier can be applied to all the [rollup functions](#rollup-functions), [transform functions](#transform-functions)
|
||||
|
||||
@@ -50,7 +50,7 @@ and performing [regular upgrades](https://docs.victoriametrics.com/victoriametri
|
||||
|
||||
1. Go to [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_quickstart_guide) and sign up (it's free).
|
||||
1. After signing up, you will be immediately granted $200 of trial credits you can spend on running Single node or Cluster.
|
||||
1. Navigate to the VictoriaMetrics Cloud [quick start](https://docs.victoriametrics.com/victoriametrics-cloud/quickstart/#creating-deployments) guide for detailed instructions.
|
||||
1. Navigate to the VictoriaMetrics Cloud [quick start](https://docs.victoriametrics.com/victoriametrics-cloud/get-started/quickstart/#creating-deployments) guide for detailed instructions.
|
||||
|
||||
### Starting VictoriaMetrics Single Node via Docker {id="starting-vm-single-via-docker"}
|
||||
|
||||
@@ -58,9 +58,9 @@ Download the newest available [VictoriaMetrics release](https://docs.victoriamet
|
||||
from [DockerHub](https://hub.docker.com/r/victoriametrics/victoria-metrics) or [Quay](https://quay.io/repository/victoriametrics/victoria-metrics?tab=tags):
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/victoria-metrics:v1.125.1
|
||||
docker pull victoriametrics/victoria-metrics:v1.126.0
|
||||
docker run -it --rm -v `pwd`/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 \
|
||||
victoriametrics/victoria-metrics:v1.125.1 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
victoriametrics/victoria-metrics:v1.126.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
```
|
||||
|
||||
_For Enterprise images see [this link](https://docs.victoriametrics.com/victoriametrics/enterprise/#docker-images)._
|
||||
@@ -452,7 +452,8 @@ All of them are present in [Grafana dashboards](https://grafana.com/orgs/victori
|
||||
|
||||
It is recommended to read [Replication and data safety](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety),
|
||||
[Why replication doesn’t save from disaster?](https://valyala.medium.com/speeding-up-backups-for-big-time-series-databases-533c1a927883)
|
||||
and [backups](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#backups).
|
||||
|
||||
For backup configuration, please refer to [vmbackup documentation](https://docs.victoriametrics.com/victoriametrics/vmbackup/).
|
||||
|
||||
### Configuring limits
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ VictoriaMetrics has the following prominent features:
|
||||
* [Prometheus exposition format](#how-to-import-data-in-prometheus-exposition-format).
|
||||
* [InfluxDB line protocol](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/) over HTTP, TCP and UDP.
|
||||
* [Graphite plaintext protocol](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#ingesting) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon).
|
||||
* [OpenTSDB put message](#sending-data-via-telnet-put-protocol).
|
||||
* [OpenTSDB put message](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/#sending-data-via-telnet).
|
||||
* [HTTP OpenTSDB /api/put requests](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/#sending-data-via-http).
|
||||
* [JSON line format](#how-to-import-data-in-json-line-format).
|
||||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
@@ -630,6 +630,8 @@ Note that `production` builds are not supported via Podman because Podman does n
|
||||
|
||||
## How to work with snapshots
|
||||
|
||||
### Create snapshot
|
||||
|
||||
Send a request to `http://<victoriametrics-addr>:8428/snapshot/create` endpoint in order to create
|
||||
an [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
The page returns the following JSON response on successful creation of snapshot:
|
||||
@@ -655,6 +657,8 @@ See also [snapshot troubleshooting](#snapshot-troubleshooting).
|
||||
|
||||
The `http://<victoriametrics-addr>:8428/snapshot/list` endpoint returns the list of available snapshots.
|
||||
|
||||
### Delete snapshot
|
||||
|
||||
Send a query to `http://<victoriametrics-addr>:8428/snapshot/delete?snapshot=<snapshot-name>` in order
|
||||
to delete the snapshot with `<snapshot-name>` name.
|
||||
|
||||
@@ -855,7 +859,7 @@ Additionally, VictoriaMetrics can accept metrics via the following popular data
|
||||
* InfluxDB line protocol. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/#influxdb-compatible-agents-such-as-telegraf) for details.
|
||||
* Graphite plaintext protocol. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#ingesting) for details.
|
||||
* OpenTelemetry http API. See [these docs](#sending-data-via-opentelemetry) for details.
|
||||
* OpenTSDB telnet put protocol. See [these docs](#sending-data-via-telnet-put-protocol) for details.
|
||||
* OpenTSDB telnet put protocol. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/#sending-data-via-telnet) for details.
|
||||
* OpenTSDB http `/api/put` protocol. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/#sending-data-via-http) for details.
|
||||
* `/api/v1/import` for importing data obtained from [/api/v1/export](#how-to-export-data-in-json-line-format).
|
||||
See [these docs](#how-to-import-data-in-json-line-format) for details.
|
||||
@@ -1328,7 +1332,7 @@ since it uses lower amounts of RAM, CPU and network bandwidth than Prometheus.
|
||||
If you use identically configured [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) instances for collecting the same data
|
||||
and sending it to VictoriaMetrics, then do not forget enabling [deduplication](#deduplication) at VictoriaMetrics side.
|
||||
|
||||
See [victoria-metrics-distributed chart](https://docs.victoriametrics.com/helm/victoriametrics-distributed/) for an example.
|
||||
See [victoria-metrics-distributed chart](https://docs.victoriametrics.com/helm/victoria-metrics-distributed/) for an example.
|
||||
|
||||
## Deduplication
|
||||
|
||||
@@ -2221,9 +2225,10 @@ Use [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/) to migrate
|
||||
|
||||
## Backfilling
|
||||
|
||||
VictoriaMetrics accepts historical data in arbitrary order of time via [any supported ingestion method](#how-to-import-time-series-data).
|
||||
See [how to backfill data with recording rules in vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/#rules-backfilling).
|
||||
Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
|
||||
VictoriaMetrics accepts out-of-order historical data via [any supported ingestion method](#how-to-import-time-series-data)
|
||||
without limitations. Only make sure that backfilled data is within of the configured [retention period](https://docs.victoriametrics.com/victoriametrics/#retention).
|
||||
|
||||
> See [how to backfill recording rules via vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/#rules-backfilling).
|
||||
|
||||
It is recommended disabling [query cache](#rollup-result-cache) with `-search.disableCache` command-line flag when writing
|
||||
historical data with timestamps from the past, since the cache assumes that the data is written with
|
||||
@@ -2232,7 +2237,7 @@ the current timestamps. Query cache can be enabled after the backfilling is comp
|
||||
An alternative solution is to query [/internal/resetRollupResultCache](https://docs.victoriametrics.com/victoriametrics/url-examples/#internalresetrollupresultcache)
|
||||
after the backfilling is complete. This will reset the [query cache](#rollup-result-cache), which could contain incomplete data cached during the backfilling.
|
||||
|
||||
Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
|
||||
Yet another solution is to increase `-search.cacheTimestampOffset` flag value to disable caching
|
||||
for data with timestamps close to the current time. Single-node VictoriaMetrics automatically resets response
|
||||
cache when samples with timestamps older than `now - search.cacheTimestampOffset` are ingested to it.
|
||||
|
||||
@@ -2254,11 +2259,8 @@ See also [high availability docs](#high-availability) and [backup docs](#backups
|
||||
|
||||
## Backups
|
||||
|
||||
VictoriaMetrics supports backups via [vmbackup](https://docs.victoriametrics.com/victoriametrics/vmbackup/)
|
||||
and [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/) tools.
|
||||
We also provide [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/) tool for enterprise subscribers.
|
||||
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
|
||||
For backup configuration and setup, please refer to [vmbackup documentation](https://docs.victoriametrics.com/victoriametrics/vmbackup/).
|
||||
|
||||
|
||||
## vmalert
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ aliases:
|
||||
- /troubleshooting/index.html
|
||||
- /troubleshooting/
|
||||
---
|
||||
This document contains troubleshooting guides for most common issues when working with VictoriaMetrics:
|
||||
This document contains troubleshooting guides for the most common issues when working with VictoriaMetrics:
|
||||
|
||||
- [General troubleshooting checklist](#general-troubleshooting-checklist)
|
||||
- [Unexpected query results](#unexpected-query-results)
|
||||
@@ -26,9 +26,9 @@ This document contains troubleshooting guides for most common issues when workin
|
||||
## General troubleshooting checklist
|
||||
|
||||
If you hit some issue or have some question about VictoriaMetrics components,
|
||||
then please follow the following steps in order to quickly find the solution:
|
||||
then please follow these steps in order to quickly find the solution:
|
||||
|
||||
1. Check the version of VictoriaMetrics component, which needs to be troubleshot and compare
|
||||
1. Check the version of VictoriaMetrics component, you are troubleshooting and compare
|
||||
it to [the latest available version](https://docs.victoriametrics.com/victoriametrics/changelog/).
|
||||
If the used version is lower than the latest available version, then there are high chances
|
||||
that the issue is already resolved in newer versions. Carefully read [the changelog](https://docs.victoriametrics.com/victoriametrics/changelog/)
|
||||
@@ -46,9 +46,9 @@ then please follow the following steps in order to quickly find the solution:
|
||||
These cases are documented in [the changelog](https://docs.victoriametrics.com/victoriametrics/changelog/).
|
||||
So please read the changelog before the upgrade.
|
||||
|
||||
1. Inspect command-line flags passed to VictoriaMetrics components and remove flags which unclear outcomes for your workload.
|
||||
1. Inspect command-line flags passed to VictoriaMetrics components and remove flags that have unclear outcomes for your workload.
|
||||
VictoriaMetrics components are designed to work optimally with the default command-line flag values (e.g. when these flags aren't set explicitly).
|
||||
It is recommended removing flags with unclear outcomes, since they may result in unexpected issues.
|
||||
It is recommended to remove flags with unclear outcomes, since they may result in unexpected issues.
|
||||
|
||||
1. Check for logs in VictoriaMetrics components. They may contain useful information about cause of the issue
|
||||
and how to fix the issue. If the log message doesn't have enough useful information for troubleshooting,
|
||||
@@ -103,7 +103,6 @@ then please follow the following steps in order to quickly find the solution:
|
||||
1. Pro tip 4: if you can fix the issue on yourself, then please do it and provide the corresponding pull request!
|
||||
We are glad to get pull requests from VictoriaMetrics community.
|
||||
|
||||
|
||||
## Unexpected query results
|
||||
|
||||
If you see unexpected or unreliable query results from VictoriaMetrics, then try the following steps:
|
||||
@@ -114,8 +113,8 @@ If you see unexpected or unreliable query results from VictoriaMetrics, then try
|
||||
|
||||
- Remove the outer `sum` and execute `rate(http_requests_total[5m])`,
|
||||
since aggregations could hide some missing series, gaps in data or anomalies in existing series.
|
||||
If this query returns too many time series, then try adding more specific label filters to it.
|
||||
For example, if you see that the original query returns unexpected results for the `job="foo"`,
|
||||
If this query returns too many time series, then try adding more specific label filters to it.
|
||||
For example, if you see that the original query returns unexpected results for the `job="foo"`,
|
||||
then use `rate(http_requests_total{job="foo"}[5m])` query.
|
||||
If this isn't enough, then continue adding more specific label filters, so the resulting query returns
|
||||
manageable number of time series.
|
||||
@@ -134,9 +133,10 @@ If you see unexpected or unreliable query results from VictoriaMetrics, then try
|
||||
|
||||
```sh
|
||||
single-node: curl http://victoriametrics:8428/api/v1/export -d 'match[]=http_requests_total' -d 'start=...' -d 'end=...' -d 'reduce_mem_usage=1'
|
||||
|
||||
|
||||
cluster: curl http://<vmselect>:8481/select/<tenantID>/prometheus/api/v1/export -d 'match[]=http_requests_total' -d 'start=...' -d 'end=...' -d 'reduce_mem_usage=1'
|
||||
```
|
||||
|
||||
Note that responses returned from [/api/v1/query](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#instant-query)
|
||||
and from [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query) contain **evaluated** data
|
||||
instead of raw samples stored in VictoriaMetrics. See [these docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness)
|
||||
@@ -155,7 +155,7 @@ If you see unexpected or unreliable query results from VictoriaMetrics, then try
|
||||
- By passing `nocache=1` query arg to every request to `/api/v1/query` and `/api/v1/query_range`.
|
||||
If you use Grafana, then this query arg can be specified in `Custom Query Parameters` field
|
||||
at Prometheus datasource settings - see [these docs](https://grafana.com/docs/grafana/latest/datasources/prometheus/) for details.
|
||||
|
||||
|
||||
If the problem was in the cache, try resetting it via [resetRollupCache handler](https://docs.victoriametrics.com/victoriametrics/url-examples/#internalresetrollupresultcache).
|
||||
|
||||
1. If you use cluster version of VictoriaMetrics, then it may return partial responses by default
|
||||
@@ -176,17 +176,17 @@ If you see unexpected or unreliable query results from VictoriaMetrics, then try
|
||||
or targets unavailability on scrapes, irregular pushes, irregular timestamps).
|
||||
VictoriaMetrics automatically [fills the gaps](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query)
|
||||
based on median interval between [data samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples).
|
||||
This might work incorrect for irregular data as median will be skewed. In this case it is recommended to switch
|
||||
to the static interval for gaps filling by setting `-search.minStalenessInterval=5m` cmd-line flag (`5m` is
|
||||
This may work incorrectly for irregular data as median will be skewed. In this case it is recommended to switch
|
||||
to the static interval for gaps filling by setting `-search.minStalenessInterval=5m` command-line flag (`5m` is
|
||||
the static interval used by Prometheus).
|
||||
|
||||
1. If you observe recently written data is not immediately visible/queryable, then read more about
|
||||
1. If you observe recently written data is not immediately visible/queryable, then read more about
|
||||
[query latency](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#query-latency) behavior.
|
||||
|
||||
1. Try upgrading to the [latest available version of VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
and verifying whether the issue is fixed there.
|
||||
|
||||
1. Try executing the query with `trace=1` query arg. This enables query tracing, which may contain
|
||||
1. Try executing the query with `trace=1` query arg. This enables query tracing, that may contain
|
||||
useful information on why the query returns unexpected data. See [query tracing docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing) for details.
|
||||
|
||||
1. Inspect command-line flags passed to VictoriaMetrics components. If you don't understand clearly the purpose
|
||||
@@ -196,13 +196,12 @@ If you see unexpected or unreliable query results from VictoriaMetrics, then try
|
||||
|
||||
1. If the steps above didn't help identifying the root cause of unexpected query results,
|
||||
then [file a bugreport](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/new) with details on how to reproduce the issue.
|
||||
Instead of sharing screenshots in the issue, consider sharing query and [trace](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing)
|
||||
Instead of sharing screenshots in the issue, consider sharing query and [trace](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing)
|
||||
results in [VMUI](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui) by clicking on `Export query` button in top right corner of the graph area.
|
||||
|
||||
|
||||
## Slow data ingestion
|
||||
|
||||
There are the following most commons reasons for slow data ingestion in VictoriaMetrics:
|
||||
These are the most commons reasons for slow data ingestion in VictoriaMetrics:
|
||||
|
||||
1. Memory shortage for the given amounts of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series).
|
||||
|
||||
@@ -214,12 +213,12 @@ There are the following most commons reasons for slow data ingestion in Victoria
|
||||
unpacks it, re-constructs the missing entry and puts it into the cache. This takes additional CPU time and disk read IO.
|
||||
|
||||
The [official Grafana dashboards for VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring)
|
||||
contain `Slow inserts` graph, which shows the cache miss percentage for `storage/tsid` cache
|
||||
contain `Slow inserts` graph, that shows the cache miss percentage for `storage/tsid` cache
|
||||
during data ingestion. If `slow inserts` graph shows values greater than 5% for more than 10 minutes,
|
||||
then it is likely the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series)
|
||||
cannot fit the `storage/tsid` cache.
|
||||
|
||||
There are the following solutions exist for this issue:
|
||||
These are the solutions that exist for this issue:
|
||||
|
||||
- To increase the available memory on the host where VictoriaMetrics runs until `slow inserts` percentage
|
||||
will become lower than 5%. If you run VictoriaMetrics cluster, then you need increasing total available
|
||||
@@ -229,7 +228,7 @@ There are the following most commons reasons for slow data ingestion in Victoria
|
||||
- To reduce the number of active time series. The [official Grafana dashboards for VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring)
|
||||
contain a graph showing the number of active time series. Recent versions of VictoriaMetrics
|
||||
provide [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer),
|
||||
which can help determining and fixing the source of [high cardinality](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-cardinality).
|
||||
that can help determining and fixing the source of [high cardinality](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-cardinality).
|
||||
|
||||
1. [High churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate),
|
||||
e.g. when old time series are substituted with new time series at a high rate.
|
||||
@@ -240,15 +239,15 @@ There are the following most commons reasons for slow data ingestion in Victoria
|
||||
So VictoriaMetrics may work slower than expected under [high churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate).
|
||||
|
||||
The [official Grafana dashboards for VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring)
|
||||
provides `Churn rate` graph, which shows the average number of new time series registered
|
||||
provides `Churn rate` graph, that shows the average number of new time series registered
|
||||
during the last 24 hours. If this number exceeds the number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series),
|
||||
then you need to identify and fix the source of [high churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate).
|
||||
The most commons source of high churn rate is a label, which frequently changes its value. Try avoiding such labels.
|
||||
The most common source of high churn rate is a label, that frequently changes its value. Try avoiding such labels.
|
||||
The [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer) can help identifying
|
||||
such labels.
|
||||
|
||||
1. Resource shortage. The [official Grafana dashboards for VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring)
|
||||
contain `resource usage` graphs, which show memory usage, CPU usage, disk IO usage and free disk size.
|
||||
contain `resource usage` graphs, that show memory usage, CPU usage, disk IO usage and free disk size.
|
||||
Make sure VictoriaMetrics has enough free resources for graceful handling of potential spikes in workload
|
||||
according to the following recommendations:
|
||||
|
||||
@@ -265,7 +264,7 @@ There are the following most commons reasons for slow data ingestion in Victoria
|
||||
with slightly increased data ingestion rate.
|
||||
|
||||
- If the percentage of free memory reaches 0, then the Operating System where VictoriaMetrics components run,
|
||||
may have no enough memory for [page cache](https://en.wikipedia.org/wiki/Page_cache).
|
||||
may not have enough memory for [page cache](https://en.wikipedia.org/wiki/Page_cache).
|
||||
VictoriaMetrics relies on page cache for quick queries over recently ingested data.
|
||||
If the operating system has no enough free memory for page cache, then it needs
|
||||
to re-read the requested data from disk. This may **significantly** increase disk read IO
|
||||
@@ -273,11 +272,11 @@ There are the following most commons reasons for slow data ingestion in Victoria
|
||||
|
||||
- If free disk space is lower than 20%, then VictoriaMetrics is unable to perform optimal
|
||||
background merge of the incoming data. This leads to increased number of data files on disk,
|
||||
which, in turn, slows down both data ingestion and querying. See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#storage) for details.
|
||||
that, in turn, slows down both data ingestion and querying. See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#storage) for details.
|
||||
|
||||
1. If you run cluster version of VictoriaMetrics, then make sure `vminsert` and `vmstorage` components
|
||||
are located in the same network with small network latency between them.
|
||||
`vminsert` packs incoming data into batch packets and sends them to `vmstorage` on-by-one.
|
||||
`vminsert` packs incoming data into batch packets and sends them to `vmstorage` one-by-one.
|
||||
It waits until `vmstorage` returns back `ack` response before sending the next packet.
|
||||
If the network latency between `vminsert` and `vmstorage` is high (for example, if they run in different datacenters),
|
||||
then this may become limiting factor for data ingestion speed.
|
||||
@@ -289,14 +288,14 @@ There are the following most commons reasons for slow data ingestion in Victoria
|
||||
is resource shortage at `vmstorage` nodes. In this case you need to increase amounts
|
||||
of available resources (CPU, RAM, disk IO) at `vmstorage` nodes or to add more `vmstorage` nodes to the cluster.
|
||||
|
||||
1. Noisy neighbor. Make sure VictoriaMetrics components run in an environments without other resource-hungry apps.
|
||||
Such apps may steal RAM, CPU, disk IO and network bandwidth, which is needed for VictoriaMetrics components.
|
||||
1. Noisy neighbor. Make sure VictoriaMetrics components run in an environment without other resource-hungry apps.
|
||||
Such apps may steal RAM, CPU, disk IO and network bandwidth, that is needed for VictoriaMetrics components.
|
||||
Issues like this are very hard to catch via [official Grafana dashboard for cluster version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#monitoring)
|
||||
and proper diagnosis would require checking resource usage on the instances where VictoriaMetrics runs.
|
||||
|
||||
1. If you see `TooHighSlowInsertsRate` [alert](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring) when single-node VictoriaMetrics or `vmstorage` has enough
|
||||
free CPU and RAM, then increase `-cacheExpireDuration` command-line flag at single-node VictoriaMetrics or at `vmstorage` to the value,
|
||||
which exceeds the interval between ingested samples for the same time series (aka `scrape_interval`).
|
||||
that exceeds the interval between ingested samples for the same time series (aka `scrape_interval`).
|
||||
See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for more details.
|
||||
|
||||
1. If you see constant and abnormally high CPU usage of VictoriaMetrics component, check `CPU spent on GC` panel
|
||||
@@ -311,15 +310,15 @@ Some queries may take more time and resources (CPU, RAM, network bandwidth) than
|
||||
VictoriaMetrics logs slow queries if their execution time exceeds the duration passed
|
||||
to `-search.logSlowQueryDuration` command-line flag (5s by default).
|
||||
|
||||
VictoriaMetrics provides [`top queries` page at VMUI](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#top-queries), which shows
|
||||
VictoriaMetrics provides [`top queries` page at VMUI](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#top-queries), that shows
|
||||
queries that took the most time to execute.
|
||||
|
||||
There are the following solutions exist for improving performance of slow queries:
|
||||
These are the solutions that exist for improving performance of slow queries:
|
||||
|
||||
- Adding more CPU and memory to VictoriaMetrics, so it may perform the slow query faster.
|
||||
If you use cluster version of VictoriaMetrics, then migrating `vmselect` nodes to machines
|
||||
with more CPU and RAM should help improving speed for slow queries. Query performance
|
||||
is always limited by resources of one `vmselect` which processes the query. For example, if 2vCPU cores on `vmselect`
|
||||
is always limited by resources of one `vmselect` that processes the query. For example, if 2vCPU cores on `vmselect`
|
||||
isn't enough to process query fast enough, then migrating `vmselect` to a machine with 4vCPU cores should increase heavy query performance by up to 2x.
|
||||
If the line on `concurrent select` graph form the [official Grafana dashboard for VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#monitoring)
|
||||
is close to the limit, then prefer adding more `vmselect` nodes to the cluster.
|
||||
@@ -361,10 +360,9 @@ There are the following solutions exist for improving performance of slow querie
|
||||
See [this article](https://www.robustperception.io/rate-then-sum-never-sum-then-rate/) for more details.
|
||||
|
||||
VictoriaMetrics provides [query tracing](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing) feature,
|
||||
which can help determining the source of slow query.
|
||||
that can help determining the source of slow query.
|
||||
See also [this article](https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986),
|
||||
which explains how to determine and optimize slow queries.
|
||||
|
||||
that explains how to determine and optimize slow queries.
|
||||
|
||||
## Out of memory errors
|
||||
|
||||
@@ -377,16 +375,16 @@ There are the following most common sources of out of memory (aka OOM) crashes i
|
||||
VictoriaMetrics is optimized for running with default flag values (e.g. when they aren't set explicitly).
|
||||
|
||||
For example, it isn't recommended tuning cache sizes in VictoriaMetrics, since it frequently leads to OOM exceptions.
|
||||
[These docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning) refer command-line flags, which aren't
|
||||
[These docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning) refer command-line flags, that aren't
|
||||
recommended to tune. If you see that VictoriaMetrics needs increasing some cache sizes for the current workload,
|
||||
then it is better migrating to a host with more memory instead of trying to tune cache sizes manually.
|
||||
|
||||
1. Unexpected heavy queries. The query is considered as heavy if it needs to select and process millions of unique time series.
|
||||
Such query may lead to OOM exception, since VictoriaMetrics needs to keep some of per-series data in memory.
|
||||
VictoriaMetrics provides [various settings](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits),
|
||||
which can help limit resource usage.
|
||||
VictoriaMetrics provides [various settings](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits),
|
||||
that can help limit resource usage.
|
||||
For more context, see [How to optimize PromQL and MetricsQL queries](https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986).
|
||||
VictoriaMetrics also provides [query tracer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing)
|
||||
VictoriaMetrics also provides [query tracer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing)
|
||||
to help identify the source of heavy query.
|
||||
|
||||
1. Lack of free memory for processing workload spikes. If VictoriaMetrics components use almost all the available memory
|
||||
@@ -396,7 +394,6 @@ There are the following most common sources of out of memory (aka OOM) crashes i
|
||||
See [capacity planning for single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#capacity-planning)
|
||||
and [capacity planning for cluster version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#capacity-planning).
|
||||
|
||||
|
||||
## Cluster instability
|
||||
|
||||
VictoriaMetrics cluster may become unstable if there is no enough free resources (CPU, RAM, disk IO, network bandwidth)
|
||||
@@ -407,7 +404,7 @@ The most common sources of cluster instability are:
|
||||
- Workload spikes. For example, if the number of active time series increases by 2x while
|
||||
the cluster has no enough free resources for processing the increased workload,
|
||||
then it may become unstable.
|
||||
VictoriaMetrics provides various configuration settings, which can be used for limiting unexpected workload spikes.
|
||||
VictoriaMetrics provides various configuration settings, that can be used for limiting unexpected workload spikes.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#resource-usage-limits) for details.
|
||||
|
||||
- Various maintenance tasks such as rolling upgrades or rolling restarts during configuration changes.
|
||||
@@ -416,7 +413,7 @@ The most common sources of cluster instability are:
|
||||
This means that the load on healthy `vmstorage` nodes increases by at least `100%/(N-1)=50%`
|
||||
comparing to the load before rolling restart. E.g. they need to process 50% more incoming
|
||||
data and to return 50% more data during queries. In reality, the load on the remaining `vmstorage`
|
||||
nodes increases even more because they need to register new time series, which were re-routed
|
||||
nodes increases even more because they need to register new time series, that were re-routed
|
||||
from temporarily unavailable `vmstorage` node. If `vmstorage` nodes had less than 50%
|
||||
of free resources (CPU, RAM, disk IO) before the rolling restart, then it
|
||||
can lead to cluster overload and instability for both data ingestion and querying.
|
||||
@@ -434,21 +431,21 @@ The most common sources of cluster instability are:
|
||||
respecting their order. If the order of labels in time series is constantly changing, this could cause wrong sharding
|
||||
calculation and result in un-even and sub-optimal time series distribution across available vmstorages. It is expected
|
||||
that metrics pushing client is responsible for consistent labels order (like `Prometheus` or `vmagent` during scraping).
|
||||
If this can't be guaranteed, set `-sortLabels=true` cmd-line flag to `vminsert`. Please note, sorting may increase
|
||||
If this can't be guaranteed, set `-sortLabels=true` command-line flag to `vminsert`. Please note, sorting may increase
|
||||
CPU usage for `vminsert`.
|
||||
|
||||
- Network instability between cluster components (`vminsert`, `vmselect`, `vmstorage`) may lead to increased error rates, timeouts, or degraded performance.
|
||||
Check resource usage graphs for all components on [the official Grafana dashboard for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#monitoring).
|
||||
- Network instability between cluster components (`vminsert`, `vmselect`, `vmstorage`) may lead to increased error rates, timeouts, or degraded performance.
|
||||
Check resource usage graphs for all components on [the official Grafana dashboard for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#monitoring).
|
||||
If the graphs show high CPU usage, then the cluster is likely overloaded and requires more resources.
|
||||
Note that short-lived 100% CPU spikes may not be visible in metrics with typical 10–30s scrape intervals,
|
||||
but can still cause transient network failures. In such cases, check CPU usage at the OS level with higher-resolution tools.
|
||||
Note that short-lived 100% CPU spikes may not be visible in metrics with typical 10–30s scrape intervals,
|
||||
but can still cause transient network failures. In such cases, check CPU usage at the OS level with higher-resolution tools.
|
||||
Consider increasing `-vmstorageDialTimeout` and `-rpc.handshakeTimeout`{{% available_from "v1.124.0" %}} to mitigate the effects of CPU spikes.
|
||||
|
||||
|
||||
If resource usage looks normal but networking issues still occur, then the root cause is likely outside VictoriaMetrics.
|
||||
This may be caused by unreliable or congested network links, especially across availability zones or regions.
|
||||
In multi-AZ setups, consider [a multi-level cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multi-level-cluster-setup) with region-local load balancers to reduce cross-zone connections.
|
||||
This may be caused by unreliable or congested network links, especially across availability zones or regions.
|
||||
In multi-AZ setups, consider [a multi-level cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multi-level-cluster-setup) with region-local load balancers to reduce cross-zone connections.
|
||||
If the network cannot be improved, increasing timeouts such as `-vmstorageDialTimeout`, `-rpc.handshakeTimeout`{{% available_from "v1.124.0" %}}, or `-search.maxQueueDuration` may help, but should be done cautiously, as higher timeouts can impact cluster stability in other ways.
|
||||
Keep in mind that VictoriaMetrics assumes reliable networking between components. If the network is unstable, the overall cluster stability may degrade regardless of resource availability.
|
||||
Keep in mind that VictoriaMetrics assumes reliable networking between components. If the network is unstable, the overall cluster stability may degrade regardless of resource availability.
|
||||
|
||||
The obvious solution against VictoriaMetrics cluster instability is to make sure cluster components
|
||||
have enough free resources for graceful processing of the increased workload.
|
||||
@@ -456,7 +453,6 @@ See [capacity planning docs](https://docs.victoriametrics.com/victoriametrics/cl
|
||||
and [cluster resizing and scalability docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#cluster-resizing-and-scalability)
|
||||
for details.
|
||||
|
||||
|
||||
## Too much disk space used
|
||||
|
||||
If too much disk space is used by a [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) or by `vmstorage` component
|
||||
@@ -467,15 +463,16 @@ at [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cl
|
||||
|
||||
- Under normal conditions the size of `<-storageDataPath>/indexdb` folder must be smaller than the size of `<-storageDataPath>/data` folder, where `-storageDataPath`
|
||||
is the corresponding command-line flag value. This can be checked by the following query if [VictoriaMetrics monitoring](#monitoring) is properly set up:
|
||||
|
||||
```metricsql
|
||||
sum(vm_data_size_bytes{type=~"indexdb/.+"}) without(type)
|
||||
/
|
||||
sum(vm_data_size_bytes{type=~"(storage|indexdb)/.+"}) without(type)
|
||||
```
|
||||
If this query returns values bigger than 0.5, then it is likely there is a [high churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate) issue,
|
||||
which results in excess disk space usage for both `indexdb` and `data` folders under `-storageDataPath` folder.
|
||||
The solution is to identify and fix the source of high churn rate with [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer).
|
||||
|
||||
If this query returns values bigger than 0.5, then it is likely there is a [high churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate) issue,
|
||||
that results in excess disk space usage for both `indexdb` and `data` folders under `-storageDataPath` folder.
|
||||
The solution is to identify and fix the source of high churn rate with [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer).
|
||||
|
||||
## Monitoring
|
||||
|
||||
|
||||
@@ -26,6 +26,17 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## tip
|
||||
|
||||
* SECURITY: upgrade Go builder from Go1.25.0 to Go1.25.1. See [the list of issues addressed in Go1.25.1](https://github.com/golang/go/issues?q=milestone%3AGo1.25.1%20label%3ACherryPickApproved).
|
||||
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): add `-rule.resultsLimit` command-line flag to allow limiting the number of alerts or recording results a single rule can produce. See [#5792](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5792).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): stream responses from backends to clients without delays. Previously the backend data could be buffered at `vmauth` side for indefinite amounts of time. This was preventing from using `vmauth` for streaming the data from backends in [live tailing mode](https://docs.victoriametrics.com/victorialogs/querying/#live-tailing). See [VictoriaLogs#667](https://github.com/VictoriaMetrics/VictoriaLogs/issues/667).
|
||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/victoriametrics/vmbackup/), [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): push metrics to configured `-pushmetrics.url` on shutdown. Before, if `-pushmetrics.url` was configured, vmbackup or vmrestore might have skipped to report their metrics before shutdown.
|
||||
* FEATURE: [dashboards/operator](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/operator.json), [dashboards/query-stats](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/query-stats.json): add ad-hoc filters.
|
||||
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): remove the error log when marshaling an invalid comment or an empty HELP metadata line during scraping, if [metadata processing](https://docs.victoriametrics.com/victoriametrics/vmagent/#metric-metadata) is enabled. See [#9710](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9710).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent unexpected performance degradation caused by cache misses (exposed via `vm_cache_misses_total` metric) during rotation. See this PR [#9769](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9769) for details.
|
||||
* BUGFIX: all components: restore sorting order of summary and quantile metrics exposed by VictoriaMetrics components on `/metrics` page. See [metrics#105](https://github.com/VictoriaMetrics/metrics/pull/105) for details.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): avoid applying offset modifier twice to the request time when an instant query uses rollup functions `rate()` or `avg_over_time()` with cache enabled. See [#9762](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9762).
|
||||
|
||||
## [v1.126.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.126.0)
|
||||
|
||||
@@ -39,7 +50,7 @@ Released at 2025-09-12
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): fix possible partial rule update responses in group-related APIs during group updates in hot config reload. See [#9551](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9551)
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): properly apply rollup functions to metrics based on their name in vmui's [metrics explorer](https://docs.victoriametrics.com/victoriametrics/#metrics-explorer). See [#9655](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9655) for details. Thanks to @wbwren-eric for the fix.
|
||||
* BUGFIX: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: fix support for automatic issuing of TLS certificates for HTTPS server via [Let's Encrypt service](https://letsencrypt.org/) using [TLS-ALPN-01 challenge](https://letsencrypt.org/docs/challenge-types/#tls-alpn-01). See [Automatic issuing of TLS certificates](https://docs.victoriametrics.com/victoriametrics/#automatic-issuing-of-tls-certificates) for more info.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix VMUI backend URL, while using multitenant API. See more in [#9703](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9703).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix incorrect backend URL in vmui when using multitenant API. See more in [#9703](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9703).
|
||||
* BUGFIX: all components: properly expose metadata for summaries and histograms in VictoriaMetrics components with enabled `-metrics.exposeMetadata` cmd-line flag. See [metrics#98](https://github.com/VictoriaMetrics/metrics/issues/98) for details.
|
||||
* BUGFIX: all components: lower severity of the log message for unavailable [Pressure Stall Information (PSI)](https://docs.kernel.org/accounting/psi.html) metrics from `ERROR` to `INFO` level. See [#9161](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9161) for details.
|
||||
* BUGFIX: [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): properly prepare restore mark contents when using a backup shortname (e.g. `vmbackupmanager restore create daily/2025-09-12`). Previously, restore would fail with `failed to restore backup: cannot initialize remote fs: missing scheme in path` error.
|
||||
@@ -48,9 +59,9 @@ Released at 2025-09-12
|
||||
|
||||
Released at 2025-09-03
|
||||
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly route requests for `prometheus/vmui/config.json` API. Follow-up after 7f15e9f64cb8dd2b2f0f1c10d178fd06ac7c636c.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix `workingsetcache` metrics. Previously, after cache rotation, metrics could be double-counted or inflated. See [9553](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9553) for details.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix the issue where filtering on click does not work on the Explorer Cardinality page. See [#9674](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9674) for details.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly route requests for `prometheus/vmui/config.json` API. The routing was broken in [v1.125.0](https://docs.victoriametrics.com/CHANGELOG.html#v11250).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix `workingsetcache` metrics. Previously, after cache rotation, metrics could be double-counted or inflated. See [#9553](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9553) for details.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix the issue where filtering on click does not work on the [Cardinality Explorer](https://docs.victoriametrics.com/victoriametrics/#cardinality-explorer) page. See [#9674](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9674) for details.
|
||||
|
||||
## [v1.125.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.125.0)
|
||||
|
||||
@@ -115,6 +126,21 @@ Released at 2025-08-01
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): do not configure `-httpListenAddr.useProxyProtocol` for `-httpInternalListenAddr`. See this issue [#9515](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9515) for details.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): always display the tenant selector if the list of tenants is not empty. See [#9396](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9396).
|
||||
|
||||
## [v1.122.4](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.4)
|
||||
|
||||
Released at 2025-09-12
|
||||
|
||||
**v1.122.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.122.x line will be supported for at least 12 months since [v1.122.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11220) release**
|
||||
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): properly apply rollup functions to metrics based on their name in vmui's [metrics explorer](https://docs.victoriametrics.com/victoriametrics/#metrics-explorer). See [#9655](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9655) for details. Thanks to @wbwren-eric for the fix.
|
||||
* BUGFIX: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: fix support for automatic issuing of TLS certificates for HTTPS server via [Let's Encrypt service](https://letsencrypt.org/) using [TLS-ALPN-01 challenge](https://letsencrypt.org/docs/challenge-types/#tls-alpn-01). See [Automatic issuing of TLS certificates](https://docs.victoriametrics.com/victoriametrics/#automatic-issuing-of-tls-certificates) for more info.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix VMUI backend URL, while using multitenant API. See more in [#9703](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9703).
|
||||
* BUGFIX: all components: properly expose metadata for summaries and histograms in VictoriaMetrics components with enabled `-metrics.exposeMetadata` cmd-line flag. See [metrics#98](https://github.com/VictoriaMetrics/metrics/issues/98) for details.
|
||||
* BUGFIX: all components: lower severity of the log message for unavailable [Pressure Stall Information (PSI)](https://docs.kernel.org/accounting/psi.html) metrics from `ERROR` to `INFO` level. See [#9161](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9161) for details.
|
||||
* BUGFIX: [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): properly prepare restore mark contents when using a backup shortname (e.g. `vmbackupmanager restore create daily/2025-09-12`). Previously, restore would fail with `failed to restore backup: cannot initialize remote fs: missing scheme in path` error.
|
||||
|
||||
## [v1.122.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.3)
|
||||
|
||||
Released at 2025-08-29
|
||||
@@ -505,6 +531,20 @@ Released at 2025-02-10
|
||||
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [vmselect](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix discrepancies when using `or` binary operator. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7759) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7640) issues for details.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly update number of unique series for [cardinality limiter](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-limiter) on ingestion. Previously, limit could undercount the real number of the ingested unique series.
|
||||
|
||||
## [v1.110.19](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.110.19)
|
||||
|
||||
Released at 2025-09-12
|
||||
|
||||
**v1.110.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.110.x line will be supported for at least 12 months since [v1.110.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11100) release**
|
||||
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): properly apply rollup functions to metrics based on their name in vmui's [metrics explorer](https://docs.victoriametrics.com/victoriametrics/#metrics-explorer). See [#9655](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9655) for details. Thanks to @wbwren-eric for the fix.
|
||||
* BUGFIX: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: fix support for automatic issuing of TLS certificates for HTTPS server via [Let's Encrypt service](https://letsencrypt.org/) using [TLS-ALPN-01 challenge](https://letsencrypt.org/docs/challenge-types/#tls-alpn-01). See [Automatic issuing of TLS certificates](https://docs.victoriametrics.com/victoriametrics/#automatic-issuing-of-tls-certificates) for more info.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix VMUI backend URL, while using multitenant API. See more in [#9703](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9703).
|
||||
* BUGFIX: all components: properly expose metadata for summaries and histograms in VictoriaMetrics components with enabled `-metrics.exposeMetadata` cmd-line flag. See [metrics#98](https://github.com/VictoriaMetrics/metrics/issues/98) for details.
|
||||
* BUGFIX: all components: lower severity of the log message for unavailable [Pressure Stall Information (PSI)](https://docs.kernel.org/accounting/psi.html) metrics from `ERROR` to `INFO` level. See [#9161](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9161) for details.
|
||||
|
||||
## [v1.110.18](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.110.18)
|
||||
|
||||
Released at 2025-09-3
|
||||
|
||||
@@ -819,7 +819,7 @@ Released at 2022-04-12
|
||||
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly propagate limits at `-search.max*` command-line flags from `vminsert` to `vmstorage`. The limits are `-search.maxUniqueTimeseries`, `-search.maxSeries`, `-search.maxFederateSeries`, `-search.maxExportSeries`, `-search.maxGraphiteSeries` and `-search.maxTSDBStatusSeries`. They weren't propagated to `vmstorage` because of the bug. These limits were introduced in [v1.76.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v1760). See [this bug](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2450).
|
||||
* BUGFIX: fix goroutine leak and possible deadlock when importing invalid data via [native binary format](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-native-format). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2423).
|
||||
* BUGFIX: [Graphite Render API](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#render-api): properly calculate [hitCount](https://graphite.readthedocs.io/en/latest/functions.html#graphite.render.functions.hitcount) function. Previously it could return empty results if there were no original samples in some parts of the selected time range.
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): allow overriding built-in function names inside [WITH templates](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs). For example, `WITH (sum(a,b) = a + b + 1) sum(x,y)` now expands into `x + y + 1`. Previously such a query would fail with `cannot use reserved name` error. See [this bugreport](https://github.com/VictoriaMetrics/metricsql/issues/5).
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): allow overriding built-in function names inside [WITH templates](https://play.victoriametrics.com/select/0/prometheus/graph/#/expand-with-exprs). For example, `WITH (sum(a,b) = a + b + 1) sum(x,y)` now expands into `x + y + 1`. Previously such a query would fail with `cannot use reserved name` error. See [this bugreport](https://github.com/VictoriaMetrics/metricsql/issues/5).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): properly display values greater than 1000 on Y axis. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2409).
|
||||
|
||||
|
||||
|
||||
@@ -671,7 +671,7 @@ Released at 2024-03-01
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add support for `client_id` option into [kuma_sd_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#kuma_sd_configs) in the same way as Prometheus does. See [this pull request](https://github.com/prometheus/prometheus/pull/13278).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add support for `enable_compression` option in [scrape_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs) in order to be compatible with Prometheus scrape configs. See [this pull request](https://github.com/prometheus/prometheus/pull/13166) and [this feature request](https://github.com/prometheus/prometheus/issues/12319). Note that `vmagent` was always supporting [`disable_compression` option](https://docs.victoriametrics.com/victoriametrics/vmagent/#scrape_config-enhancements) before Prometheus added `enable_compression` option.
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): support client-side TLS configuration for [InfluxDB](https://docs.victoriametrics.com/victoriametrics/vmctl/influxdb/), [Remote Read protocol](https://docs.victoriametrics.com/victoriametrics/vmctl/remoteread/) and [OpenTSDB](https://docs.victoriametrics.com/victoriametrics/vmctl/opentsdb/). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5748). Thanks to @khushijain21 for pull requests [1](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5783), [2](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5798), [3](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5797).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): preserve [`WITH` templates](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs) when clicking the `prettify query` button at the right side of query input field. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5383).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): preserve [`WITH` templates](https://play.victoriametrics.com/select/0/prometheus/graph/#/expand-with-exprs) when clicking the `prettify query` button at the right side of query input field. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5383).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): allow filling gaps on graphs with interpolated lines as Grafana does. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5152) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5862).
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert): support filtering by group, rule or labels in [vmalert's UI](https://docs.victoriametrics.com/victoriametrics/vmalert/#web) for `/groups` and `/alerts` pages. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5791) by @victoramsantos.
|
||||
* FEATURE: [docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#docker-compose-environment-for-victoriametrics): create a separate [docker-compose environment](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#victoriaLogs-server) for VictoriaLogs installation, including fluentbit and [VictoriaLogs Grafana datasource](https://github.com/VictoriaMetrics/victorialogs-datasource). See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#victoriaLogs-server) for details.
|
||||
|
||||
@@ -117,7 +117,7 @@ It is allowed to run VictoriaMetrics and VictoriaLogs Enterprise components in [
|
||||
|
||||
Binary releases of Enterprise components are available at [the releases page for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
and [the releases page for VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/releases/latest).
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.125.1-enterprise.tar.gz`.
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.126.0-enterprise.tar.gz`.
|
||||
|
||||
In order to run binary release of Enterprise component, please download the `*-enterprise.tar.gz` archive for your OS and architecture
|
||||
from the corresponding releases page and unpack it. Then run the unpacked binary.
|
||||
@@ -135,8 +135,8 @@ For example, the following command runs VictoriaMetrics Enterprise binary with t
|
||||
obtained at [this page](https://victoriametrics.com/products/enterprise/trial/):
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.125.1/victoria-metrics-linux-amd64-v1.125.1-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.125.1-enterprise.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.126.0/victoria-metrics-linux-amd64-v1.126.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.126.0-enterprise.tar.gz
|
||||
./victoria-metrics-prod -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
@@ -151,7 +151,7 @@ Alternatively, VictoriaMetrics Enterprise license can be stored in the file and
|
||||
It is allowed to run VictoriaMetrics and VictoriaLogs Enterprise components in [cases listed here](#valid-cases-for-victoriametrics-enterprise).
|
||||
|
||||
Docker images for Enterprise components are available at [VictoriaMetrics Docker Hub](https://hub.docker.com/u/victoriametrics) and [VictoriaMetrics Quay](https://quay.io/organization/victoriametrics).
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.125.1-enterprise`.
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.126.0-enterprise`.
|
||||
|
||||
In order to run Docker image of VictoriaMetrics Enterprise component, it is required to provide the license key via the command-line
|
||||
flag as described in the [binary-releases](#binary-releases) section.
|
||||
@@ -161,13 +161,13 @@ Enterprise license key can be obtained at [this page](https://victoriametrics.co
|
||||
For example, the following command runs VictoriaMetrics Enterprise Docker image with the specified license key:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.125.1-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.126.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
Alternatively, the license code can be stored in the file and then referred via `-licenseFile` command-line flag:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.125.1-enterprise -licenseFile=/path/to/vm-license
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.126.0-enterprise -licenseFile=/path/to/vm-license
|
||||
```
|
||||
|
||||
Example docker-compose configuration:
|
||||
@@ -177,7 +177,7 @@ version: "3.5"
|
||||
services:
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.125.1
|
||||
image: victoriametrics/victoria-metrics:v1.126.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -209,7 +209,7 @@ is used to provide the license key in plain-text:
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.125.1-enterprise
|
||||
tag: v1.126.0-enterprise
|
||||
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
@@ -220,7 +220,7 @@ In order to provide the license key via existing secret, the following values fi
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.125.1-enterprise
|
||||
tag: v1.126.0-enterprise
|
||||
|
||||
license:
|
||||
secret:
|
||||
@@ -270,7 +270,7 @@ spec:
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
image:
|
||||
tag: v1.125.1-enterprise
|
||||
tag: v1.126.0-enterprise
|
||||
```
|
||||
|
||||
In order to provide the license key via existing secret, the following custom resource is used:
|
||||
@@ -287,7 +287,7 @@ spec:
|
||||
name: vm-license
|
||||
key: license
|
||||
image:
|
||||
tag: v1.125.1-enterprise
|
||||
tag: v1.126.0-enterprise
|
||||
```
|
||||
|
||||
Example secret with license key:
|
||||
@@ -320,7 +320,7 @@ Builds are available for amd64 and arm64 architectures.
|
||||
|
||||
Example archive:
|
||||
|
||||
`victoria-metrics-linux-amd64-v1.125.1-enterprise.tar.gz`
|
||||
`victoria-metrics-linux-amd64-v1.126.0-enterprise.tar.gz`
|
||||
|
||||
Includes:
|
||||
|
||||
@@ -329,7 +329,7 @@ Includes:
|
||||
|
||||
Example Docker image:
|
||||
|
||||
`victoriametrics/victoria-metrics:v1.125.1-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
`victoriametrics/victoria-metrics:v1.126.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
|
||||
## Monitoring license expiration
|
||||
|
||||
|
||||
@@ -35,8 +35,8 @@ scrape_configs:
|
||||
After you created the `scrape.yaml` file, download and unpack [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to the same directory:
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.125.1/victoria-metrics-linux-amd64-v1.125.1.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.125.1.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.126.0/victoria-metrics-linux-amd64-v1.126.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.126.0.tar.gz
|
||||
```
|
||||
|
||||
Then start VictoriaMetrics and instruct it to scrape targets defined in `scrape.yaml` and save scraped metrics
|
||||
@@ -150,8 +150,8 @@ Then start [single-node VictoriaMetrics](https://docs.victoriametrics.com/victor
|
||||
|
||||
```yaml
|
||||
# Download and unpack single-node VictoriaMetrics
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.125.1/victoria-metrics-linux-amd64-v1.125.1.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.125.1.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.126.0/victoria-metrics-linux-amd64-v1.126.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.126.0.tar.gz
|
||||
|
||||
# Run single-node VictoriaMetrics with the given scrape.yaml
|
||||
./victoria-metrics-prod -promscrape.config=scrape.yaml
|
||||
|
||||
@@ -13,8 +13,9 @@ The aggregation is applied to all the metrics received via any [supported data i
|
||||
and/or scraped from [Prometheus-compatible targets](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter)
|
||||
after applying all the configured [relabeling stages](https://docs.victoriametrics.com/victoriametrics/relabeling/).
|
||||
|
||||
**By default, stream aggregation ignores timestamps associated with the input [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples).
|
||||
It expects that the ingested samples have timestamps close to the current time. See [how to ignore old samples](#ignoring-old-samples).**
|
||||
**By default, stream aggregation ignores timestamps associated with the input [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples). It expects that the ingested samples have timestamps close to the current time. See [how to ignore old samples](#ignoring-old-samples).**
|
||||
|
||||
**If `-streamAggr.dedupInterval` is enabled, out-of-order samples (older than already received) within the configured interval are treated as duplicates and ignored. See [de-duplication](#deduplication).**
|
||||
|
||||
# Use cases
|
||||
|
||||
@@ -329,18 +330,7 @@ deduplication and stream aggregation for all the received data, scraped or pushe
|
||||
The processed data is then stored in local storage and **can't be forwarded further**.
|
||||
|
||||
[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) supports relabeling, deduplication and stream aggregation for all
|
||||
the received data, scraped or pushed. Then, the collected data will be forwarded to specified `-remoteWrite.url` destinations.
|
||||
The data processing order is the following:
|
||||
|
||||
1. all the received data is relabeled according to the specified [`-remoteWrite.relabelConfig`](https://docs.victoriametrics.com/victoriametrics/relabeling/) (if it is set)
|
||||
1. all the received data is deduplicated according to specified [`-streamAggr.dedupInterval`](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication)
|
||||
(if it is set to duration bigger than 0)
|
||||
1. all the received data is aggregated according to specified [`-streamAggr.config`](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#configuration) (if it is set)
|
||||
1. the resulting data is then replicated to each `-remoteWrite.url`
|
||||
1. data sent to each `-remoteWrite.url` can be additionally relabeled according to the corresponding `-remoteWrite.urlRelabelConfig` (set individually per URL)
|
||||
1. data sent to each `-remoteWrite.url` can be additionally deduplicated according to the corresponding `-remoteWrite.streamAggr.dedupInterval` (set individually per URL)
|
||||
1. data sent to each `-remoteWrite.url` can be additionally aggregated according to the corresponding `-remoteWrite.streamAggr.config` (set individually per URL)
|
||||
It isn't recommended using `-streamAggr.config` and `-remoteWrite.streamAggr.config` simultaneously, unless you understand the complications.
|
||||
the received data, scraped or pushed. See the [processing order for vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/#life-of-a-sample).
|
||||
|
||||
Typical scenarios for data routing with `vmagent`:
|
||||
|
||||
@@ -379,6 +369,8 @@ It is possible to drop the given labels before applying the de-duplication. See
|
||||
|
||||
The online de-duplication uses the same logic as [`-dedup.minScrapeInterval` command-line flag](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) at VictoriaMetrics.
|
||||
|
||||
De-deuplication is applied before stream aggreation rules and can drop samples before they get matched for aggregation.
|
||||
|
||||
# Relabeling
|
||||
|
||||
It is possible to apply [arbitrary relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) to input and output metrics
|
||||
@@ -432,7 +424,7 @@ In this case it may be a good idea to drop the aggregated data during the first
|
||||
just after the restart of `vmagent` or single-node VictoriaMetrics. This can be done via the following options:
|
||||
|
||||
- The `-streamAggr.ignoreFirstIntervals=N` command-line flag at `vmagent` and single-node VictoriaMetrics. This flag instructs skipping the first `N`
|
||||
[aggregation intervals](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-config) just after the restart across all the [configured stream aggregation configs](#configuration).
|
||||
[aggregation intervals](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-config) just after the restart across all the [configured stream aggregation configs](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/).
|
||||
|
||||
The `-remoteWrite.streamAggr.ignoreFirstIntervals` command-line flag can be specified individually per each `-remoteWrite.url` at [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
|
||||
|
||||
@@ -26,10 +26,8 @@ Single-node VictoriaMetrics:
|
||||
curl -v http://localhost:8428/api/v1/admin/tsdb/delete_series -d 'match[]=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
The expected output should return [HTTP Status 204](https://datatracker.ietf.org/doc/html/rfc7231#page-53) and will look like:
|
||||
|
||||
|
||||
```sh
|
||||
* Trying 127.0.0.1:8428...
|
||||
* Connected to 127.0.0.1 (127.0.0.1) port 8428 (#0)
|
||||
@@ -46,17 +44,14 @@ The expected output should return [HTTP Status 204](https://datatracker.ietf.org
|
||||
* Connection #0 to host 127.0.0.1 left intact
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl -v http://<vmselect>:8481/delete/0/prometheus/api/v1/admin/tsdb/delete_series -d 'match[]=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
The expected output should return [HTTP Status 204](https://datatracker.ietf.org/doc/html/rfc7231#page-53) and will look like:
|
||||
|
||||
|
||||
```sh
|
||||
* Trying 127.0.0.1:8481...
|
||||
* Connected to 127.0.0.1 (127.0.0.1) port 8481 (#0)
|
||||
@@ -73,7 +68,6 @@ The expected output should return [HTTP Status 204](https://datatracker.ietf.org
|
||||
* Connection #0 to host 127.0.0.1 left intact
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [How to delete time series](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-delete-time-series)
|
||||
@@ -89,14 +83,12 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/api/v1/export -d 'match[]=vm_http_request_errors_total' > filename.json
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl http://<vmselect>:8481/select/0/prometheus/api/v1/export -d 'match[]=vm_http_request_errors_total' > filename.json
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [How to export time series](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-time-series)
|
||||
@@ -138,14 +130,12 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/api/v1/export/native -d 'match[]=vm_http_request_errors_total' > filename.bin
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl http://<vmselect>:8481/select/0/prometheus/api/v1/export/native -d 'match[]=vm_http_request_errors_total' > filename.bin
|
||||
```
|
||||
|
||||
|
||||
More information:
|
||||
|
||||
* [How to export time series](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-time-series)
|
||||
@@ -162,14 +152,12 @@ Single-node VictoriaMetrics:
|
||||
curl -H 'Content-Type: application/json' --data-binary "@filename.json" -X POST http://localhost:8428/api/v1/import
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl -H 'Content-Type: application/json' --data-binary "@filename.json" -X POST http://<vminsert>:8480/insert/0/prometheus/api/v1/import
|
||||
```
|
||||
|
||||
|
||||
More information:
|
||||
|
||||
* [How to import time series](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data)
|
||||
@@ -198,7 +186,6 @@ A single CSV line can contain multiple metrics. For example, this command import
|
||||
curl -d "GOOG,1.23,4.56,NYSE" 'http://localhost:8428/api/v1/import/csv?format=2:metric:ask,3:metric:bid,1:label:ticker,4:label:market'
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [How to import time series](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data)
|
||||
@@ -237,7 +224,6 @@ Single-node VictoriaMetrics:
|
||||
curl -d 'metric_name{foo="bar"} 123' -X POST http://localhost:8428/api/v1/import/prometheus
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
@@ -260,7 +246,6 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/prometheus/api/v1/labels
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
@@ -272,6 +257,7 @@ An arbitrary time range can be set via [`start` and `end` query args](https://do
|
||||
The specified `start..end` time range is rounded to UTC day granularity because of performance reasons.
|
||||
|
||||
Additional information:
|
||||
|
||||
* [Getting label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
|
||||
* [Prometheus querying API usage](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage)
|
||||
* [URL format for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format)
|
||||
@@ -297,6 +283,7 @@ An arbitrary time range can be set via `start` and `end` query args.
|
||||
The specified `start..end` time range is rounded to UTC day granularity because of performance reasons.
|
||||
|
||||
Additional information:
|
||||
|
||||
* [Querying label values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
|
||||
* [Prometheus querying API usage](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage)
|
||||
* [URL format for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format)
|
||||
@@ -311,15 +298,14 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/prometheus/api/v1/query -d 'query=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl http://<vmselect>:8481/select/0/prometheus/api/v1/query -d 'query=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [Instant queries](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#instant-query)
|
||||
* [Prometheus querying API usage](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage)
|
||||
* [Query language](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#metricsql)
|
||||
@@ -335,15 +321,14 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/prometheus/api/v1/query_range -d 'query=sum(increase(vm_http_request_errors_total{job="foo"}[5m]))' -d 'start=-1d' -d 'step=1h'
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl http://<vmselect>:8481/select/0/prometheus/api/v1/query_range -d 'query=sum(increase(vm_http_request_errors_total{job="foo"}[5m]))' -d 'start=-1d' -d 'step=1h'
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [Range queries](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query)
|
||||
* [Prometheus querying API usage](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage)
|
||||
* [Query language](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#metricsql)
|
||||
@@ -359,7 +344,6 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/prometheus/api/v1/series -d 'match[]=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
@@ -371,6 +355,7 @@ An arbitrary time range can be set via `start` and `end` query args.
|
||||
The specified `start..end` time range is rounded to UTC day granularity because of performance reasons.
|
||||
|
||||
Additional information:
|
||||
|
||||
* [Finding series by label matchers](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
|
||||
* [Prometheus querying API usage](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage)
|
||||
* [URL format for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format)
|
||||
@@ -386,15 +371,14 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/prometheus/api/v1/status/tsdb
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl http://<vmselect>:8481/select/0/prometheus/api/v1/status/tsdb
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [TSDB Stats](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats)
|
||||
* [Prometheus querying API usage](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage)
|
||||
* [URL format for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format)
|
||||
@@ -403,20 +387,16 @@ Additional information:
|
||||
|
||||
**DataDog URL for Single-node VictoriaMetrics**
|
||||
|
||||
|
||||
```
|
||||
http://victoriametrics:8428/datadog
|
||||
```
|
||||
|
||||
|
||||
**DataDog URL for Cluster version of VictoriaMetrics**
|
||||
|
||||
|
||||
```
|
||||
http://vminsert:8480/insert/0/datadog
|
||||
```
|
||||
|
||||
|
||||
### /datadog/api/v1/series
|
||||
|
||||
**Imports data in DataDog v1 format into VictoriaMetrics**
|
||||
@@ -445,7 +425,6 @@ echo '
|
||||
' | curl -X POST -H 'Content-Type: application/json' --data-binary @- http://localhost:8428/datadog/api/v1/series
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
@@ -470,13 +449,11 @@ echo '
|
||||
' | curl -X POST -H 'Content-Type: application/json' --data-binary @- 'http://<vminsert>:8480/insert/0/datadog/api/v1/series'
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [How to send data from DataDog agent](https://docs.victoriametrics.com/victoriametrics/integrations/datadog/)
|
||||
* [URL format for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format)
|
||||
|
||||
|
||||
### /datadog/api/v2/series
|
||||
|
||||
**Imports data in [DataDog v2](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics) format into VictoriaMetrics**
|
||||
@@ -509,7 +486,6 @@ echo '
|
||||
' | curl -X POST -H 'Content-Type: application/json' --data-binary @- http://localhost:8428/datadog/api/v2/series
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
@@ -538,7 +514,6 @@ echo '
|
||||
' | curl -X POST -H 'Content-Type: application/json' --data-binary @- 'http://<vminsert>:8480/insert/0/datadog/api/v2/series'
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [How to send data from DataDog agent](https://docs.victoriametrics.com/victoriametrics/integrations/datadog/)
|
||||
@@ -554,14 +529,12 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/federate -d 'match[]=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl http://<vmselect>:8481/select/0/prometheus/federate -d 'match[]=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [Federation](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#federation)
|
||||
@@ -578,14 +551,12 @@ Single-node VictoriaMetrics:
|
||||
curl http://localhost:8428/graphite/metrics/find -d 'query=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl http://<vmselect>:8481/select/0/graphite/metrics/find -d 'query=vm_http_request_errors_total'
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [Metrics find API in Graphite](https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find)
|
||||
@@ -603,14 +574,12 @@ Single-node VictoriaMetrics:
|
||||
curl -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST http://localhost:8428/write
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST http://<vminsert>:8480/insert/0/influx/write
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [How to send Influx data to VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/)
|
||||
@@ -635,8 +604,6 @@ curl -Is http://<vmselect>:8481/internal/resetRollupResultCache
|
||||
vmselect will propagate this call to the rest of the vmselects listed in its `-selectNode` cmd-line flag. If this
|
||||
flag isn't set, then cache need to be purged from each vmselect individually.
|
||||
|
||||
|
||||
|
||||
### TCP and UDP
|
||||
|
||||
#### How to send data from OpenTSDB-compatible agents to VictoriaMetrics
|
||||
@@ -650,14 +617,12 @@ Single-node VictoriaMetrics:
|
||||
echo "put foo.bar.baz `date +%s` 123 tag1=value1 tag2=value2" | nc -N localhost 4242
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
echo "put foo.bar.baz `date +%s` 123 tag1=value1 tag2=value2" | nc -N http://<vminsert> 4242
|
||||
```
|
||||
|
||||
|
||||
Enable HTTP server for OpenTSDB /api/put requests by setting `-opentsdbHTTPListenAddr` command-line flag.
|
||||
|
||||
Single-node VictoriaMetrics:
|
||||
@@ -666,14 +631,12 @@ Single-node VictoriaMetrics:
|
||||
curl -H 'Content-Type: application/json' -d '[{"metric":"foo","value":45.34},{"metric":"bar","value":43}]' http://localhost:4242/api/put
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
curl -H 'Content-Type: application/json' -d '[{"metric":"foo","value":45.34},{"metric":"bar","value":43}]' http://<vminsert>:4242/insert/42/opentsdb/api/put
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [OpenTSDB http put API](http://opentsdb.net/docs/build/html/api_http/put.html)
|
||||
@@ -689,14 +652,12 @@ Single-node VictoriaMetrics:
|
||||
echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003
|
||||
```
|
||||
|
||||
|
||||
Cluster version of VictoriaMetrics:
|
||||
|
||||
```sh
|
||||
echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N http://<vminsert> 2003
|
||||
```
|
||||
|
||||
|
||||
Additional information:
|
||||
|
||||
* [How to send Graphite data to VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#ingesting)
|
||||
|
||||
@@ -6,7 +6,7 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used peridically to ensure the flags in sync. -->
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used periodically to ensure the flags in sync. -->
|
||||
```shellhelp
|
||||
|
||||
victoria-metrics is a time series database and monitoring solution.
|
||||
|
||||
@@ -13,7 +13,7 @@ aliases:
|
||||
- /vmagent/
|
||||
---
|
||||
|
||||
`vmagent` is a tiny agent which helps you collect metrics from various sources,
|
||||
`vmagent` is a tiny agent that helps you collect metrics from various sources,
|
||||
[relabel and filter the collected metrics](https://docs.victoriametrics.com/victoriametrics/relabeling/)
|
||||
and store them in [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
or any other storage systems via Prometheus `remote_write` protocol
|
||||
@@ -47,7 +47,7 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
|
||||
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
* Uses much lower amounts of RAM, CPU, disk IO and network bandwidth than Prometheus. The RAM usage and CPU usage can be reduced further
|
||||
if needed according to [these docs](#performance-optimizations).
|
||||
* Scrape targets can be spread among multiple `vmagent` instances when big number of targets must be scraped. See [these docs](#scraping-big-number-of-targets).
|
||||
* Scrape targets can be spread among multiple `vmagent` instances when large number of targets must be scraped. See [these docs](#scraping-big-number-of-targets).
|
||||
* Can load scrape configs from multiple files. See [these docs](#loading-scrape-configs-from-multiple-files).
|
||||
* Can efficiently scrape targets that expose millions of time series such as [/federate endpoint in Prometheus](https://prometheus.io/docs/prometheus/latest/federation/).
|
||||
See [these docs](#stream-parsing-mode).
|
||||
@@ -131,7 +131,7 @@ See [these docs](https://docs.victoriametrics.com/victoriametrics/stream-aggrega
|
||||
|
||||
`vmagent` can accept metrics in [various popular data ingestion protocols](#how-to-push-data-to-vmagent), apply [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/)
|
||||
to the accepted metrics (for example, change metric names/labels or drop unneeded metrics) and then forward the relabeled metrics
|
||||
to other remote storage systems, which support Prometheus `remote_write` protocol (including other `vmagent` instances).
|
||||
to other remote storage systems, that support Prometheus `remote_write` protocol (including other `vmagent` instances).
|
||||
|
||||
### Replication and high availability
|
||||
|
||||
@@ -176,7 +176,7 @@ For example, if all the [raw samples](https://docs.victoriametrics.com/victoriam
|
||||
except of `instance` and `pod` labels must be routed to the same backend. In this case the list of ignored labels must be passed to
|
||||
`-remoteWrite.shardByURL.ignoreLabels` command-line flag: `-remoteWrite.shardByURL.ignoreLabels=instance,pod`.
|
||||
|
||||
See also [how to scrape big number of targets](#scraping-big-number-of-targets).
|
||||
See also [how to scrape large number of targets](#scraping-big-number-of-targets).
|
||||
|
||||
### Relabeling and filtering
|
||||
|
||||
@@ -187,31 +187,38 @@ Please see [Relabeling cookbook](https://docs.victoriametrics.com/victoriametric
|
||||
### Splitting data streams among multiple systems
|
||||
|
||||
`vmagent` supports splitting the collected data between multiple destinations with the help of `-remoteWrite.urlRelabelConfig`,
|
||||
which is applied independently for each configured `-remoteWrite.url` destination. For example, it is possible to replicate or split
|
||||
that is applied independently for each configured `-remoteWrite.url` destination. For example, it is possible to replicate or split
|
||||
data among long-term remote storage, short-term remote storage and a real-time analytical system [built on top of Kafka](https://github.com/Telefonica/prometheus-kafka-adapter).
|
||||
Note that each destination can receive its own subset of the collected data due to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
|
||||
|
||||
For example, let's assume all the scraped or received metrics by `vmagent` have label `env` with values `dev` or `prod`.
|
||||
To route metrics `env=dev` to destination `dev` and metrics with `env=prod` to destination `prod` apply the following config:
|
||||
|
||||
1. Create relabeling config file `relabelDev.yml` to drop all metrics that don't have label `env=dev`:
|
||||
|
||||
```yaml
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
```
|
||||
|
||||
2. Create relabeling config file `relabelProd.yml` to drop all metrics that don't have label `env=prod`:
|
||||
|
||||
```yaml
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "prod"
|
||||
```
|
||||
|
||||
3. Configure `vmagent` with 2 `-remoteWrite.url` flags pointing to destinations `dev` and `prod` with corresponding
|
||||
`-remoteWrite.urlRelabelConfig` configs:
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://<dev-url> -remoteWrite.urlRelabelConfig=relabelDev.yml \
|
||||
-remoteWrite.url=http://<prod-url> -remoteWrite.urlRelabelConfig=relabelProd.yml
|
||||
-remoteWrite.url=http://<prod-url> -remoteWrite.urlRelabelConfig=relabelProd.yml
|
||||
```
|
||||
|
||||
With this configuration `vmagent` will forward to `http://<dev-url>` only metrics that have `env=dev` label.
|
||||
And to `http://<prod-url>` it will forward only metrics that have `env=prod` label.
|
||||
|
||||
@@ -238,17 +245,54 @@ There is also support for multitenant writes. See [these docs](#multitenancy).
|
||||
[Deduplication at stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication) allows setting up arbitrary complex de-duplication schemes
|
||||
for the collected samples. Examples:
|
||||
|
||||
- The following command instructs `vmagent` to send only the last sample per each seen [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) per every 60 seconds:
|
||||
* The following command instructs `vmagent` to send only the last sample per each seen [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) per every 60 seconds:
|
||||
|
||||
```sh
|
||||
./vmagent -remoteWrite.url=http://remote-storage/api/v1/write -streamAggr.dedupInterval=60s
|
||||
```
|
||||
|
||||
- The following command instructs `vmagent` to merge [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) with different `replica` label values
|
||||
* The following command instructs `vmagent` to merge [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) with different `replica` label values
|
||||
and then to send only the last sample per each merged series per every 60 seconds:
|
||||
|
||||
```sh
|
||||
./vmagent -remoteWrite=http://remote-storage/api/v1/write -streamAggr.dropInputLabels=replica -streamAggr.dedupInterval=60s
|
||||
./vmagent -remoteWrite.url=http://remote-storage/api/v1/write -streamAggr.dropInputLabels=replica -streamAggr.dedupInterval=60s
|
||||
```
|
||||
|
||||
### Life of a sample
|
||||
|
||||
vmagent supports limiting, relabeling, deduplication and stream aggregation for all the received metric samples, scraped or pushed.
|
||||
The received data is then forwarded to the specified `-remoteWrite.url` destinations. The processing pipeline is the following:
|
||||
|
||||
```mermaid
|
||||
%%{init: { "themeCSS": ".nodeLabel, .edgeLabel { white-space: nowrap; word-break: normal; overflow-wrap: normal; }" }}%%
|
||||
flowchart TB
|
||||
A[Pushed or Scraped samples] --> B[Ingestion rate limiting<br><b>-maxIngestionRate</b>]
|
||||
B --> C[Global <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.relabelConfig</b>]
|
||||
C --> D[complexity limiting<br><b>-maxLabelsPerTimeseries</b><br><b>-maxLabelNameLen</b><br><b>-maxLabelValueLen</b>]
|
||||
D --> E[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter">cardinality limiting</a><br><b>-remoteWrite.maxHourlySeries</b><br><b>-remoteWrite.maxDailySeries</b>]
|
||||
E --> F[Global <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-streamAggr.config</b><br><b>-streamAggr.dedupInterval</b>]
|
||||
F --> G[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability">replicate</a> to each <b>-remoteWrite.url</b><br/>or <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages">shard</a> if <b>-remoteWrite.shardByURL</b> is set]
|
||||
|
||||
%% Left branch
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H2 --> H3[per-url extra labels<br><b>-remoteWrite.label</b>]
|
||||
H3 --> H4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H4 --> H5[[push to <b>-remoteWrite.url</b>]]
|
||||
|
||||
%% Right branch
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R2 --> R3[per-url extra labels<br><b>-remoteWrite.label</b>]
|
||||
R3 --> R4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R4 --> R5[[push to <b>-remoteWrite.url</b>]]
|
||||
```
|
||||
|
||||
Scraping has additional settings that can be applied before samples are pushed to the processing pipeline above:
|
||||
1. [Service Discovery relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/#service-discovery-relabeling)
|
||||
2. [Scraping relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/#scraping-relabeling)
|
||||
3. `sample_limit`, `series_limit`, `label_limit` in [scrape_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs).
|
||||
|
||||
## How to push data to vmagent
|
||||
|
||||
`vmagent` supports [the same set of push-based data ingestion protocols as VictoriaMetrics does](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data)
|
||||
@@ -277,13 +321,12 @@ sections from [Prometheus config file](https://prometheus.io/docs/prometheus/lat
|
||||
All other sections are ignored, including the [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section.
|
||||
Use `-remoteWrite.*` command-line flag instead for configuring remote write settings. See [the list of unsupported config sections](#unsupported-prometheus-config-sections).
|
||||
|
||||
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
The file pointed by `-promscrape.config` may contain `%{ENV_VAR}` placeholders that are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
|
||||
See also:
|
||||
|
||||
- [scrape config examples](https://docs.victoriametrics.com/victoriametrics/scrape_config_examples/)
|
||||
- [the list of supported service discovery types for Prometheus scrape targets](https://docs.victoriametrics.com/victoriametrics/sd_configs/).
|
||||
|
||||
* [scrape config examples](https://docs.victoriametrics.com/victoriametrics/scrape_config_examples/)
|
||||
* [the list of supported service discovery types for Prometheus scrape targets](https://docs.victoriametrics.com/victoriametrics/sd_configs/).
|
||||
|
||||
### scrape_config enhancements
|
||||
|
||||
@@ -305,7 +348,7 @@ scrape_configs:
|
||||
* `disable_keepalive: true` for disabling [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection)
|
||||
on a per-job basis. By default, `vmagent` uses keep-alive connections to scrape targets for reducing overhead on connection re-establishing.
|
||||
* `series_limit: N` for limiting the number of unique time series a single scrape target can expose. See [these docs](#cardinality-limiter).
|
||||
* `stream_parse: true` for scraping targets in a streaming manner. This may be useful when targets export big number of metrics. See [these docs](#stream-parsing-mode).
|
||||
* `stream_parse: true` for scraping targets in a streaming manner. This may be useful when targets export large number of metrics. See [these docs](#stream-parsing-mode).
|
||||
* `scrape_align_interval: duration` for aligning scrapes to the given interval instead of using random offset
|
||||
in the range `[0 ... scrape_interval]` for scraping each target. The random offset helps to spread scrapes evenly in time.
|
||||
* `scrape_offset: duration` for specifying the exact offset for scraping instead of using random offset in the range `[0 ... scrape_interval]`.
|
||||
@@ -353,10 +396,10 @@ There is no need in specifying top-level `scrape_configs` section in these files
|
||||
See [Prometheus querying API docs for VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage).
|
||||
* `rule_files` and `alerting`. These sections are supported by [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/).
|
||||
|
||||
The list of supported service discovery types is available [here](#how-to-collect-metrics-in-prometheus-format).
|
||||
The list of supported service discovery types is available in [how-to-collect-metrics-in-prometheus-format](#how-to-collect-metrics-in-prometheus-format).
|
||||
|
||||
Additionally, `vmagent` doesn't support `refresh_interval` option at service discovery sections.
|
||||
This option is substituted with `-promscrape.*CheckInterval` command-line flags, which are specific per each service discovery type.
|
||||
This option is substituted with `-promscrape.*CheckInterval` command-line flags, that are specific per each service discovery type.
|
||||
See [the full list of command-line flags for vmagent](#advanced-usage).
|
||||
|
||||
## Configuration update
|
||||
@@ -374,7 +417,7 @@ and `-remoteWrite.streamAggr.config`:
|
||||
|
||||
* Sending HTTP request to `http://vmagent:8429/-/reload` endpoint. This endpoint can be protected with `-reloadAuthKey` command-line flag.
|
||||
|
||||
There is also `-promscrape.configCheckInterval` command-line flag, which can be used for automatic reloading configs from updated `-promscrape.config` file.
|
||||
There is also `-promscrape.configCheckInterval` command-line flag, that can be used for automatic reloading configs from updated `-promscrape.config` file.
|
||||
|
||||
## SRV urls
|
||||
|
||||
@@ -383,14 +426,14 @@ record into TCP address with hostname and TCP port, and then uses the resulting
|
||||
|
||||
SRV urls are supported in the following places:
|
||||
|
||||
- In `-remoteWrite.url` command-line flag. For example, if `victoria-metrics` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) record contains
|
||||
* In `-remoteWrite.url` command-line flag. For example, if `victoria-metrics` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) record contains
|
||||
`victoria-metrics-host:8428` TCP address, then `-remoteWrite.url=http://srv+victoria-metrics/api/v1/write` is automatically resolved into
|
||||
`-remoteWrite.url=http://victoria-metrics-host:8428/api/v1/write`. If the DNS SRV record is resolved into multiple TCP addresses, then `vmagent`
|
||||
uses randomly chosen address per each connection it establishes to the remote storage.
|
||||
|
||||
- In scrape target addresses aka `__address__` label - see [these docs](https://docs.victoriametrics.com/victoriametrics/relabeling/#how-to-modify-scrape-urls-in-targets) for details.
|
||||
* In scrape target addresses aka `__address__` label - see [these docs](https://docs.victoriametrics.com/victoriametrics/relabeling/#how-to-modify-scrape-urls-in-targets) for details.
|
||||
|
||||
- In urls used for [service discovery](https://docs.victoriametrics.com/victoriametrics/sd_configs/).
|
||||
* In urls used for [service discovery](https://docs.victoriametrics.com/victoriametrics/sd_configs/).
|
||||
|
||||
SRV urls are useful when HTTP services run on different TCP ports or when they can change TCP ports over time (for instance, after the restart).
|
||||
|
||||
@@ -401,10 +444,10 @@ or via VictoriaMetrics remote write protocol.
|
||||
|
||||
VictoriaMetrics remote write protocol provides the following benefits comparing to Prometheus remote write protocol:
|
||||
|
||||
- Reduced network bandwidth usage by 2x-5x. This allows saving network bandwidth usage costs when `vmagent` and
|
||||
* Reduced network bandwidth usage by 2x-5x. This allows saving network bandwidth usage costs when `vmagent` and
|
||||
the configured remote storage systems are located in different datacenters, availability zones or regions.
|
||||
|
||||
- Reduced disk read/write IO and disk space usage at `vmagent` when the remote storage is temporarily unavailable.
|
||||
* Reduced disk read/write IO and disk space usage at `vmagent` when the remote storage is temporarily unavailable.
|
||||
In this case `vmagent` buffers the incoming data to disk using the VictoriaMetrics remote write format.
|
||||
This reduces disk read/write IO and disk space usage by 2x-5x comparing to Prometheus remote write format.
|
||||
|
||||
@@ -536,7 +579,7 @@ and attaches `instance`, `job` and other target-specific labels to these metrics
|
||||
* `scrape_labels_limit` - the configured limit on the number of [labels](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#labels) the given target can expose
|
||||
per [sample](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples).
|
||||
The limit can be set via `label_limit` option at [scrape_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs).
|
||||
This metric is exposed only if the `label_limit` is set.
|
||||
This metric is exposed only if the `label_limit` is set.
|
||||
|
||||
* `scrape_series_added` - **an approximate** number of new [series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) the given target generates during the current scrape.
|
||||
This metric allows detecting targets (identified by `instance` label),
|
||||
@@ -601,7 +644,7 @@ e.g. it sets `scrape_series_added` metric to zero. See [these docs](#automatical
|
||||
By default, `vmagent` ignores metric metadata exposed by scrape targets in [Prometheus exposition format](https://github.com/prometheus/docs/blob/main/docs/instrumenting/exposition_formats.md), received via [Prometheus remote write v1](https://prometheus.io/docs/specs/prw/remote_write_spec/) or [OpenTelemetry protocol](https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto). Set `-enableMetadata=true` to enable metadata processing{{% available_from "v1.125.1" %}}.
|
||||
During processing, metadata won't be dropped or modified by [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) or [streaming aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/).
|
||||
|
||||
When `-enableMultitenantHandlers` is enabled, vmagent adds tenant info to metadata received via the [multitenant endpoints](https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy) (/insert/<accountID>/<suffix>). However, if `vm_account_id` or `vm_project_id` labels are added directly to metrics before reaching vmagent, and vmagent writes to the [vminsert multitenant endpoints](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels), the tenant info won't be attached and the metadata will be stored under the default tenant of VictoriaMetrics cluster.
|
||||
When `-enableMultitenantHandlers` is enabled, vmagent adds tenant info to metadata received via the [multitenant endpoints](https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy) (`/insert/<accountID>/<suffix>`). However, if `vm_account_id` or `vm_project_id` labels are added directly to metrics before reaching vmagent, and vmagent writes to the [vminsert multitenant endpoints](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels), the tenant info won't be attached and the metadata will be stored under the default tenant of VictoriaMetrics cluster.
|
||||
|
||||
>Enabling metadata requires extra memory, disk space, and network traffic.
|
||||
|
||||
@@ -610,7 +653,7 @@ When `-enableMultitenantHandlers` is enabled, vmagent adds tenant info to metada
|
||||
By default, `vmagent` parses the full response from the scrape target, applies [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/)
|
||||
and then pushes the resulting metrics to the configured `-remoteWrite.url` in one go. This mode works good for the majority of cases
|
||||
when the scrape target exposes small number of metrics (e.g. less than 10K). But this mode may take big amounts of memory
|
||||
when the scrape target exposes big number of metrics (for example, when `vmagent` scrapes [`kube-state-metrics`](https://github.com/kubernetes/kube-state-metrics)
|
||||
when the scrape target exposes large number of metrics (for example, when `vmagent` scrapes [`kube-state-metrics`](https://github.com/kubernetes/kube-state-metrics)
|
||||
in large Kubernetes cluster). It is recommended enabling stream parsing mode for such targets.
|
||||
When this mode is enabled, `vmagent` processes the response from the scrape target in chunks.
|
||||
This allows saving memory when scraping targets that expose millions of metrics.
|
||||
@@ -626,7 +669,7 @@ stream parsing mode can be explicitly enabled in the following places:
|
||||
* Via `__stream_parse__=true` label, which can be set via [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) at `relabel_configs` section.
|
||||
In this case stream parsing mode is enabled for the corresponding scrape targets.
|
||||
Typical use case: to set the label via [Kubernetes annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/)
|
||||
for targets exposing big number of metrics.
|
||||
for targets exposing large number of metrics.
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -704,17 +747,17 @@ See also [how to shard data among multiple remote storage systems](#sharding-amo
|
||||
|
||||
## High availability
|
||||
|
||||
It is possible to run multiple **identically configured** `vmagent` instances or `vmagent`
|
||||
[clusters](#scraping-big-number-of-targets), so they [scrape](#how-to-collect-metrics-in-prometheus-format)
|
||||
the same set of targets and push the collected data to the same set of VictoriaMetrics remote storage systems.
|
||||
It is possible to run multiple **identically configured** `vmagent` instances or `vmagent`
|
||||
[clusters](#scraping-big-number-of-targets), so they [scrape](#how-to-collect-metrics-in-prometheus-format)
|
||||
the same set of targets and push the collected data to the same set of VictoriaMetrics remote storage systems.
|
||||
Two **identically configured** vmagent instances or clusters is usually called an HA pair.
|
||||
|
||||
When running HA pairs, [deduplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) must be configured
|
||||
When running HA pairs, [deduplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) must be configured
|
||||
at VictoriaMetrics side in order to de-duplicate received samples.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) for details.
|
||||
|
||||
It is also recommended passing different values to `-promscrape.cluster.name` command-line flag per each `vmagent`
|
||||
instance or per each `vmagent` cluster in HA setup. This is needed for proper data de-duplication.
|
||||
It is also recommended passing different values to `-promscrape.cluster.name` command-line flag per each `vmagent`
|
||||
instance or per each `vmagent` cluster in HA setup. This is needed for proper data de-duplication.
|
||||
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2679) for details.
|
||||
|
||||
## Scraping targets via a proxy
|
||||
@@ -760,30 +803,35 @@ scrape_configs:
|
||||
|
||||
`vmagent` stores pending data that cannot be sent to the configured remote storage systems in a timely manner.
|
||||
By default, `vmagent` writes all the pending data to folder configured via `-remoteWrite.tmpDataPath` cmd-line flag
|
||||
until this data is sent to the configured `-remoteWrite.url` systems or until the folder becomes full.
|
||||
The maximum data size that can be saved to `-remoteWrite.tmpDataPath` per every configured `-remoteWrite.url` can be
|
||||
limited via `-remoteWrite.maxDiskUsagePerURL` command-line flag. When this limit is reached, `vmagent` drops the oldest
|
||||
until this data is sent to the configured `-remoteWrite.url` systems or until the folder becomes full.
|
||||
The maximum data size that can be saved to `-remoteWrite.tmpDataPath` per every configured `-remoteWrite.url` can be
|
||||
limited via `-remoteWrite.maxDiskUsagePerURL` command-line flag. When this limit is reached, `vmagent` drops the oldest
|
||||
data from disk in order to save newly ingested data.
|
||||
|
||||
The folder structure of persistence data is as follows:
|
||||
|
||||
```
|
||||
<remoteWrite.tmpDataPath>
|
||||
└── persistent-queue
|
||||
└── 1_B9EB7BE220B91E9D
|
||||
```
|
||||
|
||||
Each remote write URL corresponds to a folder similar to `1_B9EB7BE220B91E9D`.
|
||||
|
||||
It's generated based on the following information:
|
||||
|
||||
1. The **sequence order** of the remote write URL cmd-line flags, starting from **1**.
|
||||
2. The **hash result** of the remote write URL itself, excluding query parameters and fragments.
|
||||
|
||||
For example, for the remote write configs:
|
||||
|
||||
```
|
||||
-remoteWrite.url=http://example-1:8428/prometheus/api/v1/write?foo=bar#baz
|
||||
-remoteWrite.url=http://user:pass@example-2:8428/prometheus/api/v1/write?qux=quux#quuz
|
||||
```
|
||||
|
||||
vmagent will generate the following persistent queue folders:
|
||||
|
||||
```bash
|
||||
# 1_<hash(http://example-1:8428/prometheus/api/v1/write)>, query parameters foo=bar and fragment baz are removed.
|
||||
1_BA6E4303DCFA0D45
|
||||
@@ -795,26 +843,26 @@ vmagent will generate the following persistent queue folders:
|
||||
|
||||
There are cases when it is better disabling on-disk persistence for pending data at `vmagent` side:
|
||||
|
||||
- When the persistent disk performance isn't enough for the given data processing rate.
|
||||
- When it is better to buffer pending data at the client side instead of buffering it at `vmagent` side in the `-remoteWrite.tmpDataPath` folder.
|
||||
- When the data is already buffered at [Kafka side](#reading-metrics-from-kafka) or at [Google PubSub side](https://docs.victoriametrics.com/victoriametrics/integrations/pubsub/#reading-metrics).
|
||||
- When it is better to drop pending data instead of buffering it.
|
||||
* When the persistent disk performance isn't enough for the given data processing rate.
|
||||
* When it is better to buffer pending data at the client side instead of buffering it at `vmagent` side in the `-remoteWrite.tmpDataPath` folder.
|
||||
* When the data is already buffered at [Kafka side](https://docs.victoriametrics.com/victoriametrics/integrations/kafka/#reading-metrics) or at [Google PubSub side](https://docs.victoriametrics.com/victoriametrics/integrations/pubsub/#reading-metrics).
|
||||
* When it is better to drop pending data instead of buffering it.
|
||||
|
||||
In this case `-remoteWrite.disableOnDiskQueue` command-line flag can be passed to `vmagent` per each configured `-remoteWrite.url`.
|
||||
`vmagent` works in the following way if the corresponding remote storage system at `-remoteWrite.url` cannot keep up with the data ingestion rate
|
||||
and the `-remoteWrite.disableOnDiskQueue` command-line flag is set:
|
||||
|
||||
- It returns `429 Too Many Requests` HTTP error to clients, which send data to `vmagent` via [supported HTTP endpoints](#how-to-push-data-to-vmagent).
|
||||
If `-remoteWrite.dropSamplesOnOverload` command-line flag is set or if multiple `-remoteWrite.url` command-line flags are set,
|
||||
* It returns `429 Too Many Requests` HTTP error to clients, which send data to `vmagent` via [supported HTTP endpoints](#how-to-push-data-to-vmagent).
|
||||
If `-remoteWrite.dropSamplesOnOverload` command-line flag is set or if multiple `-remoteWrite.url` command-line flags are set,
|
||||
then the ingested samples are silently dropped instead of returning the error to clients.
|
||||
- It suspends consuming data from [Kafka side](#reading-metrics-from-kafka) or [Google PubSub side](https://docs.victoriametrics.com/victoriametrics/integrations/pubsub/) until the remote storage becomes available.
|
||||
* It suspends consuming data from [Kafka side](https://docs.victoriametrics.com/victoriametrics/integrations/kafka/#reading-metrics) or [Google PubSub side](https://docs.victoriametrics.com/victoriametrics/integrations/pubsub/) until the remote storage becomes available.
|
||||
If `-remoteWrite.dropSamplesOnOverload` command-line flag is set or if multiple `-remoteWrite.disableOnDiskQueue` command-line flags are set
|
||||
for different `-remoteWrite.url` options, then the fetched samples are silently dropped instead of suspending data consumption from Kafka or Google PubSub.
|
||||
- It drops samples pushed to `vmagent` via non-HTTP protocols and logs the error. Pass `-remoteWrite.dropSamplesOnOverload` command-line flag in order
|
||||
* It drops samples pushed to `vmagent` via non-HTTP protocols and logs the error. Pass `-remoteWrite.dropSamplesOnOverload` command-line flag in order
|
||||
to suppress error messages in this case.
|
||||
- It drops samples [scraped from Prometheus-compatible targets](#how-to-collect-metrics-in-prometheus-format), because it is better from operations perspective
|
||||
* It drops samples [scraped from Prometheus-compatible targets](#how-to-collect-metrics-in-prometheus-format), because it is better from operations perspective
|
||||
to drop samples instead of blocking the scrape process.
|
||||
- It drops [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) output samples, because it is better from operations perspective
|
||||
* It drops [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) output samples, because it is better from operations perspective
|
||||
to drop output samples instead of blocking the stream aggregation process.
|
||||
|
||||
The number of dropped samples because of overloaded remote storage can be [monitored](#monitoring) via `vmagent_remotewrite_samples_dropped_total` metric.
|
||||
@@ -851,16 +899,16 @@ The limit can be enforced in the following places:
|
||||
Scraped metrics are dropped for time series exceeding the given limit on the time window of 24h.
|
||||
`vmagent` creates the following additional per-target metrics for targets with non-zero series limit:
|
||||
|
||||
- `scrape_series_limit_samples_dropped` - the number of dropped samples during the scrape when the unique series limit is exceeded.
|
||||
- `scrape_series_limit` - the series limit for the given target.
|
||||
- `scrape_series_current` - the current number of series for the given target.
|
||||
* `scrape_series_limit_samples_dropped` - the number of dropped samples during the scrape when the unique series limit is exceeded.
|
||||
* `scrape_series_limit` - the series limit for the given target.
|
||||
* `scrape_series_current` - the current number of series for the given target.
|
||||
|
||||
These metrics are automatically sent to the configured `-remoteWrite.url` alongside with the scraped per-target metrics.
|
||||
|
||||
These metrics allow building the following alerting rules:
|
||||
|
||||
- `scrape_series_current / scrape_series_limit > 0.9` - alerts when the number of series exposed by the target reaches 90% of the limit.
|
||||
- `sum_over_time(scrape_series_limit_samples_dropped[1h]) > 0` - alerts when some samples are dropped because the series limit on a particular target is reached.
|
||||
* `scrape_series_current / scrape_series_limit > 0.9` - alerts when the number of series exposed by the target reaches 90% of the limit.
|
||||
* `sum_over_time(scrape_series_limit_samples_dropped[1h]) > 0` - alerts when some samples are dropped because the series limit on a particular target is reached.
|
||||
|
||||
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs).
|
||||
|
||||
@@ -918,7 +966,7 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
|
||||
* It is recommended [setting up the official Grafana dashboard](#monitoring) in order to monitor the state of `vmagent`.
|
||||
|
||||
* It is recommended increasing the maximum number of open files in the system (`ulimit -n`) when scraping a big number of targets,
|
||||
* It is recommended to increase the maximum number of open files in the system (`ulimit -n`) when scraping a large number of targets,
|
||||
as `vmagent` establishes at least a single TCP connection per target.
|
||||
|
||||
* If `vmagent` uses too much RAM or CPU, then follow [these recommendations](#performance-optimizations).
|
||||
@@ -932,7 +980,7 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
By default, the `-promscrape.maxDroppedTargets` targets are shown here. If your setup drops more targets during relabeling,
|
||||
then increase `-promscrape.maxDroppedTargets` command-line flag value to see all the dropped targets.
|
||||
Note that tracking each dropped target requires up to 10Kb of RAM. Therefore, big values for `-promscrape.maxDroppedTargets`
|
||||
may result in increased memory usage if a big number of scrape targets are dropped during relabeling.
|
||||
may result in increased memory usage if a large number of scrape targets are dropped during relabeling.
|
||||
|
||||
* It is recommended increasing `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` [metric](#monitoring)
|
||||
grows constantly. It is also recommended increasing `-remoteWrite.maxBlockSize` and `-remoteWrite.maxRowsPerBlock` command-line flags in this case.
|
||||
@@ -1002,8 +1050,8 @@ If you have suggestions for improvements or have found a bug - please open an is
|
||||
|
||||
See also:
|
||||
|
||||
- [Relabel Debugging](https://docs.victoriametrics.com/victoriametrics/relabeling/#relabel-debugging)
|
||||
- [General Troubleshooting](https://docs.victoriametrics.com/victoriametrics/troubleshooting/)
|
||||
* [Relabel Debugging](https://docs.victoriametrics.com/victoriametrics/relabeling/#relabel-debugging)
|
||||
* [General Troubleshooting](https://docs.victoriametrics.com/victoriametrics/troubleshooting/)
|
||||
|
||||
## Calculating disk space for persistence queue
|
||||
|
||||
@@ -1041,7 +1089,7 @@ Additional notes:
|
||||
|
||||
## Security
|
||||
|
||||
See general recommendations regarding security [here](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#security).
|
||||
See general recommendations regarding [security](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#security).
|
||||
|
||||
### mTLS protection
|
||||
|
||||
@@ -1050,7 +1098,7 @@ since it is expected it runs in an isolated trusted network.
|
||||
[Enterprise version of vmagent](https://docs.victoriametrics.com/victoriametrics/enterprise/) supports the ability to accept [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication)
|
||||
requests at this port, by specifying `-tls` and `-mtls` command-line flags. For example, the following command runs `vmagent`, which accepts only mTLS requests at port `8429`:
|
||||
|
||||
```
|
||||
```sh
|
||||
./vmagent -tls -mtls -remoteWrite.url=...
|
||||
```
|
||||
|
||||
@@ -1062,38 +1110,38 @@ It is possible to specify custom TLS Root CA via `-mtlsCAFile` command-line flag
|
||||
`vmagent` is optimized for low CPU usage and low RAM usage without the need to tune any configs. Sometimes it is needed to optimize CPU / RAM usage of `vmagent` even more.
|
||||
For example, if `vmagent` needs to scrape thousands of targets in resource-constrained environments. Then the following options may help reducing CPU usage and RAM usage of `vmagent`:
|
||||
|
||||
- Set [GOGC](https://pkg.go.dev/runtime#hdr-Environment_Variables) environment variable to `100`. This reduces CPU usage at the cost of higher RAM usage.
|
||||
* Set [GOGC](https://pkg.go.dev/runtime#hdr-Environment_Variables) environment variable to `100`. This reduces CPU usage at the cost of higher RAM usage.
|
||||
|
||||
- Set [GOMAXPROCS](https://pkg.go.dev/runtime#hdr-Environment_Variables) environment variable to the value slightly bigger than the number of CPU cores used by `vmagent`.
|
||||
* Set [GOMAXPROCS](https://pkg.go.dev/runtime#hdr-Environment_Variables) environment variable to the value slightly bigger than the number of CPU cores used by `vmagent`.
|
||||
Another option is to set CPU limit in Kubernetes / Docker to the integer value bigger than the number of CPU cores used by `vmagent`.
|
||||
This reduces RAM usage and CPU usage when `vmagent` runs in an environment with bigger number of available CPU cores. Note that it may be needed to increase the `-remoteWrite.queues`
|
||||
command-line flag to bigger values if `GOMAXPROCS` is set to too small values, since by default `-remoteWrite.queues` is proportional to `GOMAXPROCS`.
|
||||
|
||||
- Disable response compression at scrape targets via `-promscrape.disableCompression` command-line flag or via `disable_compression: true` option
|
||||
* Disable response compression at scrape targets via `-promscrape.disableCompression` command-line flag or via `disable_compression: true` option
|
||||
in the [scrape_config](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs). This reduces CPU usage at the cost of higher network bandwidth usage
|
||||
between `vmagent` and scrape targets.
|
||||
|
||||
- Disable tracking of original labels for the discovered targets via `-promscrape.dropOriginalLabels` command-line flag. This helps reducing RAM usage when `vmagent`
|
||||
discovers big number of scrape targets and the majority of these targets are [dropped](https://docs.victoriametrics.com/victoriametrics/relabeling/#how-to-drop-discovered-targets).
|
||||
* Disable tracking of original labels for the discovered targets via `-promscrape.dropOriginalLabels` command-line flag. This helps reducing RAM usage when `vmagent`
|
||||
discovers large number of scrape targets and the majority of these targets are [dropped](https://docs.victoriametrics.com/victoriametrics/relabeling/#how-to-drop-discovered-targets).
|
||||
This is a typical case when `vmagent` discovers Kubernetes targets. The downside of using `-promscrape.dropOriginalLabels` command-line flag
|
||||
is the reduced [debuggability](#debugging-scrape-targets) for improperly configured per-target relabeling.
|
||||
is the reduced [debuggability](https://docs.victoriametrics.com/victoriametrics/relabeling/#relabel-debugging) for improperly configured per-target relabeling.
|
||||
|
||||
- Disable [staleness markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers) via `-promscrape.noStaleMarkers` command-line flag
|
||||
* Disable [staleness markers](https://docs.victoriametrics.com/victoriametrics/vmagent/#prometheus-staleness-markers) via `-promscrape.noStaleMarkers` command-line flag
|
||||
or via `no_stale_markers: true` option in the [scrape_config](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs). This reduces RAM usage and CPU usage.
|
||||
Note that disabling staleness markers may result in unexpected query results when scrape targets are frequently rotated (this is a typical case in Kubernetes).
|
||||
|
||||
- Set `-memory.allowedBytes` command-line flag to the value close to the actual memory usage of `vmagent`. Another option is to set memory limit in Kubernetes / Docker
|
||||
* Set `-memory.allowedBytes` command-line flag to the value close to the actual memory usage of `vmagent`. Another option is to set memory limit in Kubernetes / Docker
|
||||
to the value 50% bigger than the actual memory usage of `vmagent`. This should reduce memory usage spikes for `vmagent` running in the environment with bigger available memory
|
||||
when the remote storage cannot keep up with the data ingestion rate. Increasing `-remoteWrite.queues` command-line flag value may help in this case too.
|
||||
|
||||
- In extreme cases it may be useful to set `-promscrape.disableKeepAlive` command-line flag in order to save RAM on HTTP keep-alive connections to thousands of scrape targets.
|
||||
* In extreme cases it may be useful to set `-promscrape.disableKeepAlive` command-line flag in order to save RAM on HTTP keep-alive connections to thousands of scrape targets.
|
||||
|
||||
- Increase `scrape_interval` option in the `global` section of the `-promscrape.config` and/or at every [scrape_config](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs)
|
||||
* Increase `scrape_interval` option in the `global` section of the `-promscrape.config` and/or at every [scrape_config](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs)
|
||||
for reducing CPU usage. For example, increasing the `scrape_interval` from `10s` to `30s` across all the targets decreases CPU usage at `vmagent` by up to 3x.
|
||||
|
||||
Example command, which runs `vmagent` in an optimized mode:
|
||||
|
||||
```
|
||||
```sh
|
||||
GOGC=100 GOMAXPROCS=1 ./vmagent -promscrape.disableCompression -promscrape.dropOriginalLabels -promscrape.noStaleMarkers -memory.allowedBytes=1GiB -promscrape.disableKeepAlive ...
|
||||
```
|
||||
|
||||
@@ -1150,20 +1198,16 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
|
||||
|
||||
* Memory profile can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:8429/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
|
||||
* CPU profile can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:8429/debug/pprof/profile > cpu.pprof
|
||||
```
|
||||
|
||||
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
@@ -1247,4 +1291,3 @@ Moved to [integrations/kafka/#estimating-message-size-and-rate](https://docs.vic
|
||||
###### Kafka broker authorization and authentication
|
||||
|
||||
Moved to [integrations/kafka/#kafka-broker-authorization-and-authentication](https://docs.victoriametrics.com/victoriametrics/integrations/kafka/#kafka-broker-authorization-and-authentication).
|
||||
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 28 KiB |
@@ -6,7 +6,7 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used peridically to ensure the flags in sync. -->
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used periodically to ensure the flags in sync. -->
|
||||
```shellhelp
|
||||
|
||||
vmagent collects metrics data via popular data ingestion protocols and routes it to VictoriaMetrics.
|
||||
|
||||
@@ -18,6 +18,7 @@ VMAlert command-line tool
|
||||
|
||||
You can use `vmalert-tool` to run unit tests for alerting and recording rules.
|
||||
It will perform the following actions:
|
||||
|
||||
* sets up an isolated VictoriaMetrics instance;
|
||||
* simulates the periodic ingestion of time series;
|
||||
* queries the ingested data for recording and alerting rules evaluation like [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/);
|
||||
@@ -25,7 +26,7 @@ It will perform the following actions:
|
||||
|
||||
See how to run vmalert-tool for unit test below:
|
||||
|
||||
```
|
||||
```sh
|
||||
# Run vmalert-tool with one or multiple test files via `--files` cmd-line flag
|
||||
# Supports file path with hierarchical patterns and regexpes, and http url.
|
||||
./vmalert-tool unittest --files /path/to/file --files http://<some-server-addr>/path/to/test.yaml
|
||||
@@ -283,10 +284,12 @@ groups:
|
||||
### Debug mode
|
||||
|
||||
vmalert-tool can print additional log messages for specific alerting rules, similar to [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/#debug-mode), by following these steps:
|
||||
|
||||
1. Set `debug: true` in rule’s configuration;
|
||||
2. Run vmalert-tool with the flag `-loggerLevel=INFO`.
|
||||
|
||||
The additional log messages include tips for alert state transformations, timestamp and result of each evaluation:
|
||||
|
||||
```shell-session
|
||||
2024-12-10T12:10:26.339Z info VictoriaMetrics/app/vmalert/rule/alerting.go:212 DEBUG rule "TestGroup":"TestRule" (14686524233356632740) at 1970-01-01T00:00:00Z: query returned 0 samples (elapsed: 2.148792ms)
|
||||
2024-12-10T12:10:26.339Z info VictoriaMetrics/app/vmalert/datasource/client.go:254 DEBUG datasource request: executing POST request with params "http://127.0.0.1:8880/prometheus/api/v1/query?query=test_metric+%3E+0&step=300s&time=1970-01-01T00%3A01%3A00Z"
|
||||
|
||||
@@ -25,12 +25,13 @@ implementation and aims to be compatible with its syntax.
|
||||
Configure `-vmalert.proxyURL` on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert)
|
||||
to proxy requests to `vmalert`. Proxying is needed for the following cases:
|
||||
|
||||
* to proxy requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/);
|
||||
* to access `vmalert`'s UI through [vmui](https://docs.victoriametrics.com/victoriametrics/#vmui).
|
||||
|
||||
[VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_intro)
|
||||
provides out-of-the-box alerting functionality based on `vmalert`. This service simplifies the setup
|
||||
and management of alerting and recording rules as well as the integration with Alertmanager. For more details,
|
||||
[VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_intro)
|
||||
provides out-of-the-box alerting functionality based on `vmalert`. This service simplifies the setup
|
||||
and management of alerting and recording rules as well as the integration with Alertmanager. For more details,
|
||||
please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/).
|
||||
|
||||
## Features
|
||||
@@ -55,11 +56,12 @@ please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriam
|
||||
requests may fail;
|
||||
* `vmalert` executes rules within a group sequentially, but persistence of execution results to remote
|
||||
storage is asynchronous. Hence, user shouldn't rely on chaining of recording rules when result of previous
|
||||
recording rule is reused in the next one. See how to chain groups [here](https://docs.victoriametrics.com/victoriametrics/vmalert/#chaining-groups).
|
||||
recording rule is reused in the next one. See how to [chain groups](https://docs.victoriametrics.com/victoriametrics/vmalert/#chaining-groups).
|
||||
|
||||
## QuickStart
|
||||
|
||||
To start using `vmalert` you will need the following things:
|
||||
|
||||
* list of rules - PromQL/MetricsQL expressions to execute;
|
||||
* datasource address - reachable endpoint with [Prometheus HTTP API](https://prometheus.io/docs/prometheus/latest/querying/api/#http-api) support for running queries against;
|
||||
* notifier address [optional] - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing,
|
||||
@@ -74,6 +76,7 @@ You can use the existing [docker-compose environment](https://github.com/Victori
|
||||
as example. It already contains vmalert configured with list of alerting rules and integrated with Alert Manager and VictoriaMetrics.
|
||||
|
||||
Alternatively, build `vmalert` from sources:
|
||||
|
||||
```sh
|
||||
git clone https://github.com/VictoriaMetrics/VictoriaMetrics
|
||||
cd VictoriaMetrics
|
||||
@@ -81,6 +84,7 @@ make vmalert
|
||||
```
|
||||
|
||||
Then run `vmalert`:
|
||||
|
||||
```sh
|
||||
./bin/vmalert -rule=alert.rules \ # Path to the file with rules configuration. Supports wildcard
|
||||
-datasource.url=http://localhost:8428 \ # Prometheus HTTP API compatible datasource
|
||||
@@ -121,8 +125,8 @@ groups:
|
||||
|
||||
> Explore how to integrate `vmalert` with [VictoriaMetrics Anomaly Detection](https://docs.victoriametrics.com/anomaly-detection/) in the following [guide](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/).
|
||||
|
||||
> For users of [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_config),
|
||||
> many of the configuration steps (including highly available setup of `vmalert` for cluster deployments) are handled automatically.
|
||||
> For users of [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_config),
|
||||
> many of the configuration steps (including highly available setup of `vmalert` for cluster deployments) are handled automatically.
|
||||
> Please, refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/) for more details.
|
||||
|
||||
### Groups
|
||||
@@ -151,10 +155,10 @@ name: <string>
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 and https://docs.victoriametrics.com/victoriametrics/keyconcepts/#query-latency.
|
||||
[ eval_delay: <duration> ]
|
||||
|
||||
# Limit limits the number of alerts or recording results the rule within this group can produce.
|
||||
# On exceeding the limit, rule will be marked with an error and all its results will be discarded.
|
||||
# 0 is no limit.
|
||||
[ limit: <integer> | default 0]
|
||||
# Limit limits the number of alerts or recording results a single rule within this group can produce.
|
||||
# If exceeded, the rule will be marked with an error and all its results will be discarded.
|
||||
# 0 means no limit.
|
||||
[ limit: <integer> | default = -rule.resultsLimit flag]
|
||||
|
||||
# How many rules execute at once within a group. Increasing concurrency may speed
|
||||
# up group's evaluation duration (exposed via `vmalert_iteration_duration_seconds` metric).
|
||||
@@ -165,14 +169,14 @@ name: <string>
|
||||
[ type: <string> ]
|
||||
|
||||
# Optional
|
||||
# The evaluation timestamp will be aligned with group's interval,
|
||||
# The evaluation timestamp will be aligned with group's interval,
|
||||
# instead of using the actual timestamp that evaluation happens at.
|
||||
#
|
||||
# It is enabled by default to get more predictable results
|
||||
# It is enabled by default to get more predictable results
|
||||
# and to visually align with graphs plotted via Grafana or vmui.
|
||||
# When comparing with raw queries, remember to use `step` equal to evaluation interval.
|
||||
#
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
|
||||
# See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
|
||||
# Available starting from v1.95
|
||||
[ eval_alignment: <bool> | default true]
|
||||
|
||||
@@ -198,7 +202,7 @@ headers:
|
||||
[ <string>, ...]
|
||||
|
||||
# Optional list of HTTP headers in form `header-name: value`
|
||||
# applied for all alert notifications sent to notifiers
|
||||
# applied for all alert notifications sent to notifiers
|
||||
# generated by rules of this group.
|
||||
# It has higher priority over headers defined in notifier config.
|
||||
# For example:
|
||||
@@ -310,47 +314,47 @@ The following variables are available in templating:
|
||||
| $externalLabels or .ExternalLabels | List of labels configured via `-external.label` command-line flag. | Issues with {{ $labels.instance }} (datacenter-{{ $externalLabels.dc }}) |
|
||||
| $externalURL or .ExternalURL | URL configured via `-external.url` command-line flag. Used for cases when vmalert is hidden behind proxy. | Visit {{ $externalURL }} for more details |
|
||||
|
||||
Additionally, `vmalert` provides some extra templating functions listed [here](#template-functions) and [reusable templates](#reusable-templates).
|
||||
Additionally, `vmalert` provides some extra templating functions listed in [template functions](#template-functions) and [reusable templates](#reusable-templates).
|
||||
|
||||
#### Template functions
|
||||
|
||||
`vmalert` provides the following template functions, which can be used during [templating](#templating):
|
||||
|
||||
- `args arg0 ... argN` - converts the input args into a map with `arg0`, ..., `argN` keys.
|
||||
- `externalURL` - returns the value of `-external.url` command-line flag.
|
||||
- `first` - returns the first result from the input query results returned by `query` function.
|
||||
- `htmlEscape` - escapes special chars in input string, so it can be safely embedded as a plaintext into HTML.
|
||||
- `humanize` - converts the input number into human-readable format by adding [metric prefixes](https://en.wikipedia.org/wiki/Metric_prefix).
|
||||
* `args arg0 ... argN` - converts the input args into a map with `arg0`, ..., `argN` keys.
|
||||
* `externalURL` - returns the value of `-external.url` command-line flag.
|
||||
* `first` - returns the first result from the input query results returned by `query` function.
|
||||
* `htmlEscape` - escapes special chars in input string, so it can be safely embedded as a plaintext into HTML.
|
||||
* `humanize` - converts the input number into human-readable format by adding [metric prefixes](https://en.wikipedia.org/wiki/Metric_prefix).
|
||||
For example, `100000` is converted into `100K`.
|
||||
- `humanize1024` - converts the input number into human-readable format with 1024 base.
|
||||
* `humanize1024` - converts the input number into human-readable format with 1024 base.
|
||||
For example, `1024` is converted into 1ki`.
|
||||
- `humanizeDuration` - converts the input number in seconds into human-readable duration.
|
||||
- `humanizePercentage` - converts the input number to percentage. For example, `0.123` is converted into `12.3%`.
|
||||
- `humanizeTimestamp` - converts the input unix timestamp into human-readable time.
|
||||
- `jsonEscape` - JSON-encodes the input string.
|
||||
- `label name` - returns the value of the label with the given `name` from the input query result.
|
||||
- `match regex` - matches the input string against the provided `regex`.
|
||||
- `parseDuration` - parses the input string into duration in seconds. For example, `1h` is parsed into `3600`.
|
||||
- `parseDurationTime` - parses the input string into [time.Duration](https://pkg.go.dev/time#Duration).
|
||||
- `pathEscape` - escapes the input string, so it can be safely put inside path part of URL.
|
||||
- `pathPrefix` - returns the path part of the `-external.url` command-line flag.
|
||||
- `query` - executes the [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) query against `-datasource.url` and returns the query result.
|
||||
* `humanizeDuration` - converts the input number in seconds into human-readable duration.
|
||||
* `humanizePercentage` - converts the input number to percentage. For example, `0.123` is converted into `12.3%`.
|
||||
* `humanizeTimestamp` - converts the input unix timestamp into human-readable time.
|
||||
* `jsonEscape` - JSON-encodes the input string.
|
||||
* `label name` - returns the value of the label with the given `name` from the input query result.
|
||||
* `match regex` - matches the input string against the provided `regex`.
|
||||
* `parseDuration` - parses the input string into duration in seconds. For example, `1h` is parsed into `3600`.
|
||||
* `parseDurationTime` - parses the input string into [time.Duration](https://pkg.go.dev/time#Duration).
|
||||
* `pathEscape` - escapes the input string, so it can be safely put inside path part of URL.
|
||||
* `pathPrefix` - returns the path part of the `-external.url` command-line flag.
|
||||
* `query` - executes the [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) query against `-datasource.url` and returns the query result.
|
||||
For example, `{{ query "sort_desc(process_resident_memory_bytes)" | first | value }}` executes the `sort_desc(process_resident_memory_bytes)`
|
||||
query at `-datasource.url` and returns the first result.
|
||||
- `queryEscape` - escapes the input string, so it can be safely put inside [query arg](https://en.wikipedia.org/wiki/Percent-encoding) part of URL.
|
||||
- `quotesEscape` - escapes the input string, so it can be safely embedded into JSON string.
|
||||
- `reReplaceAll regex repl` - replaces all the occurrences of the `regex` in input string with the `repl`.
|
||||
- `safeHtml` - marks the input string as safe to use in HTML context without the need to html-escape it.
|
||||
- `sortByLabel name` - sorts the input query results by the label with the given `name`.
|
||||
- `stripDomain` - leaves the first part of the domain. For example, `foo.bar.baz` is converted to `foo`.
|
||||
* `queryEscape` - escapes the input string, so it can be safely put inside [query arg](https://en.wikipedia.org/wiki/Percent-encoding) part of URL.
|
||||
* `quotesEscape` - escapes the input string, so it can be safely embedded into JSON string.
|
||||
* `reReplaceAll regex repl` - replaces all the occurrences of the `regex` in input string with the `repl`.
|
||||
* `safeHtml` - marks the input string as safe to use in HTML context without the need to html-escape it.
|
||||
* `sortByLabel name` - sorts the input query results by the label with the given `name`.
|
||||
* `stripDomain` - leaves the first part of the domain. For example, `foo.bar.baz` is converted to `foo`.
|
||||
The port part is left in the output string. E.g. `foo.bar:1234` is converted into `foo:1234`.
|
||||
- `stripPort` - strips `port` part from `host:port` input string.
|
||||
- `strvalue` - returns the metric name from the input query result.
|
||||
- `title` - converts the first letters of every input word to uppercase.
|
||||
- `toLower` - converts all the chars in the input string to lowercase.
|
||||
- `toTime` - converts the input unix timestamp to [time.Time](https://pkg.go.dev/time#Time).
|
||||
- `toUpper` - converts all the chars in the input string to uppercase.
|
||||
- `value` - returns the numeric value from the input query result.
|
||||
* `stripPort` - strips `port` part from `host:port` input string.
|
||||
* `strvalue` - returns the metric name from the input query result.
|
||||
* `title` - converts the first letters of every input word to uppercase.
|
||||
* `toLower` - converts all the chars in the input string to lowercase.
|
||||
* `toTime` - converts the input unix timestamp to [time.Time](https://pkg.go.dev/time#Time).
|
||||
* `toUpper` - converts all the chars in the input string to uppercase.
|
||||
* `value` - returns the numeric value from the input query result.
|
||||
|
||||
#### Reusable templates
|
||||
|
||||
@@ -390,7 +394,6 @@ groups:
|
||||
The `-rule.templates` flag supports wildcards so multiple files with templates can be loaded.
|
||||
The content of `-rule.templates` can be also [hot reloaded](#hot-config-reload).
|
||||
|
||||
|
||||
#### Recording rules
|
||||
|
||||
The syntax for recording rules is following:
|
||||
@@ -425,8 +428,8 @@ For recording rules to work `-remoteWrite.url` must be specified.
|
||||
|
||||
### Alerts state on restarts
|
||||
|
||||
`vmalert` holds alerts state in the memory. Restart of the `vmalert` process will reset the state of all active alerts
|
||||
in the memory. To prevent `vmalert` from losing the state on restarts configure it to persist the state
|
||||
`vmalert` holds alerts state in the memory. Restart of the `vmalert` process will reset the state of all active alerts
|
||||
in the memory. To prevent `vmalert` from losing the state on restarts configure it to persist the state
|
||||
to the remote database via the following flags:
|
||||
|
||||
* `-remoteWrite.url` - URL to VictoriaMetrics (Single) or vminsert (Cluster). `vmalert` will persist alerts state
|
||||
@@ -436,7 +439,7 @@ to the remote database via the following flags:
|
||||
The state will be persisted to the configured address on each evaluation.
|
||||
* `-remoteRead.url` - URL to VictoriaMetrics (Single) or vmselect (Cluster). `vmalert` will try to restore alerts state
|
||||
from the configured address by querying time series with name `ALERTS_FOR_STATE`. The restore happens only once when
|
||||
`vmalert` process starts, and only for the configured rules. Config [hot reload](#hot-config-reload) doesn't trigger
|
||||
`vmalert` process starts, and only for the configured rules. Config [hot reload](#hot-config-reload) doesn't trigger
|
||||
state restore.
|
||||
|
||||
Both flags are required for proper state restoration. Restore process may fail if time series are missing
|
||||
@@ -452,9 +455,10 @@ vmalert [web UI](https://docs.victoriametrics.com/victoriametrics/vmalert/#web)
|
||||
|
||||
It is possible to override the link format. For example, to make the link to [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui)
|
||||
specify the following cmd-line flags:
|
||||
|
||||
```sh
|
||||
./bin/vmalert \
|
||||
-external.url=http://<vmui-addr> \ # the hostname and port for datasource vmui
|
||||
-external.url=http://<vmui-addr> \ # the hostname and port for datasource vmui
|
||||
-external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}' # the path built using alert expr
|
||||
```
|
||||
|
||||
@@ -463,15 +467,16 @@ expression.
|
||||
|
||||
The `-external.alert.source` cmd-line flag supports [templating](https://docs.victoriametrics.com/victoriametrics/vmalert/#templating)
|
||||
and allows using labels and extra data related to the alert. For example, see the following link to Grafana:
|
||||
|
||||
```sh
|
||||
./bin/vmalert \
|
||||
-external.url=http://<grafana-addr> \ # the hostname and port for Grafana
|
||||
-external.url=http://<grafana-addr> \ # the hostname and port for Grafana
|
||||
-external.alert.source='explore?left={"datasource":"{{ if eq .Type \"vlogs\" }}VictoriaLogs{{ else }}VictoriaMetrics{{ end }}","queries":[{"expr":{{ .Expr|jsonEscape|queryEscape }},"refId":"A"}],"range":{"from":"{{ .ActiveAt.UnixMilli }}","to":"now"}}'
|
||||
```
|
||||
|
||||
In this example, `-external.alert.source` will lead to Grafana's Explore page with `expr` field equal to alert expression,
|
||||
and time range will be selected starting from `"from":"{{ .ActiveAt.UnixMilli }}"` when alert became active.
|
||||
The `datasource` name is set to `VictoriaLogs` if rule's type {{% available_from "v1.117.0" %}} (`prometheus`, `vlogs` or `graphite`) is `vlogs`.
|
||||
The `datasource` name is set to `VictoriaLogs` if rule's type {{% available_from "v1.117.0" %}} (`prometheus`, `vlogs` or `graphite`) is `vlogs`.
|
||||
Otherwise, it is set to `VictoriaMetrics`. See [how we set alert source in docker](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cdaf83247caec32cce26e2cab226310ed0b46ee4/deployment/docker/compose-vl-single.yml#L106-L107).
|
||||
|
||||
In addition to `source` link, some extra links could be added to alert's [annotations](https://docs.victoriametrics.com/victoriametrics/vmalert/#alerting-rules)
|
||||
@@ -496,6 +501,7 @@ There are the following approaches exist for alerting and recording rules across
|
||||
the `-remoteWrite.url` and vmselect as the `-datasource.url`, add `extra_label` with tenant ID as an HTTP URL parameter for each group.
|
||||
For example, run vmalert using `-datasource.url=http://vmselect:8481/select/multitenant/prometheus -remoteWrite.url=http://vminsert:8480/insert/multitenant/prometheus`,
|
||||
along with the rule group:
|
||||
|
||||
```yaml
|
||||
groups:
|
||||
- name: rules_for_tenant_456:789
|
||||
@@ -546,19 +552,19 @@ tags at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert/tags) and
|
||||
[Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/) of `vmalert` may read alerting and recording rules
|
||||
from object storage:
|
||||
|
||||
- `./bin/vmalert -rule=s3://bucket/dir/alert.rules` would read rules from the given path at S3 bucket
|
||||
- `./bin/vmalert -rule=gs://bucket/dir/alert.rules` would read rules from the given path at GCS bucket
|
||||
* `./bin/vmalert -rule=s3://bucket/dir/alert.rules` would read rules from the given path at S3 bucket
|
||||
* `./bin/vmalert -rule=gs://bucket/dir/alert.rules` would read rules from the given path at GCS bucket
|
||||
|
||||
S3 and GCS paths support only matching by prefix, e.g. `s3://bucket/dir/rule_` matches
|
||||
all files with prefix `rule_` in the folder `dir`.
|
||||
|
||||
The following [command-line flags](#flags) can be used for fine-tuning access to S3 and GCS:
|
||||
|
||||
- `-s3.credsFilePath` - path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
- `-s3.configFilePath` - path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
- `-s3.configProfile` - profile name for S3 configs. If no set, the value of the environment variable will be loaded (`AWS_PROFILE` or `AWS_DEFAULT_PROFILE`).
|
||||
- `-s3.customEndpoint` - custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set.
|
||||
- `-s3.forcePathStyle` - prefixing endpoint with bucket name when set false, true by default.
|
||||
* `-s3.credsFilePath` - path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
* `-s3.configFilePath` - path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
* `-s3.configProfile` - profile name for S3 configs. If no set, the value of the environment variable will be loaded (`AWS_PROFILE` or `AWS_DEFAULT_PROFILE`).
|
||||
* `-s3.customEndpoint` - custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set.
|
||||
* `-s3.forcePathStyle` - prefixing endpoint with bucket name when set false, true by default.
|
||||
|
||||
### Topology examples
|
||||
|
||||
@@ -578,7 +584,7 @@ rules execution, storing recording rules results and alerts state.
|
||||
|
||||
`vmalert` configuration flags:
|
||||
|
||||
```
|
||||
```sh
|
||||
./bin/vmalert -rule=rules.yml \ # Path to the file with rules configuration. Supports wildcard
|
||||
-datasource.url=http://victoriametrics:8428 \ # VM-single addr for executing rules expressions
|
||||
-remoteWrite.url=http://victoriametrics:8428 \ # VM-single addr to persist alerts state and recording rules results
|
||||
@@ -599,7 +605,7 @@ Cluster mode could have multiple `vminsert` and `vmselect` components.
|
||||
|
||||
`vmalert` configuration flags:
|
||||
|
||||
```
|
||||
```sh
|
||||
./bin/vmalert -rule=rules.yml \ # Path to the file with rules configuration. Supports wildcard
|
||||
-datasource.url=http://vmselect:8481/select/0/prometheus # vmselect addr for executing rules expressions
|
||||
-remoteWrite.url=http://vminsert:8480/insert/0/prometheus # vminsert addr to persist alerts state and recording rules results
|
||||
@@ -610,8 +616,7 @@ Cluster mode could have multiple `vminsert` and `vmselect` components.
|
||||

|
||||
|
||||
In case when you want to spread the load on these components - add balancers before them and configure
|
||||
`vmalert` with balancer addresses. Please, see more about VM's cluster architecture
|
||||
[here](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
`vmalert` with balancer addresses. Please, see more about [VictoriaMetrics cluster architecture](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
|
||||
#### HA vmalert
|
||||
|
||||
@@ -622,7 +627,7 @@ Alertmanagers.
|
||||
|
||||
`vmalert` configuration flags:
|
||||
|
||||
```
|
||||
```sh
|
||||
./bin/vmalert -rule=rules.yml \ # Path to the file with rules configuration. Supports wildcard
|
||||
-datasource.url=http://victoriametrics:8428 \ # VM-single addr for executing rules expressions
|
||||
-remoteWrite.url=http://victoriametrics:8428 \ # VM-single addr to persist alerts state and recording rules results
|
||||
@@ -633,10 +638,9 @@ Alertmanagers.
|
||||
|
||||

|
||||
|
||||
|
||||
To avoid recording rules results and alerts state duplication in VictoriaMetrics server
|
||||
don't forget to configure [deduplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication).
|
||||
Multiple equally configured vmalerts should evaluate rules at the same timestamps, so it is recommended
|
||||
Multiple equally configured vmalerts should evaluate rules at the same timestamps, so it is recommended
|
||||
to set `-dedup.minScrapeInterval` as equal to vmalert's `-evaluationInterval`.
|
||||
|
||||
If you have multiple different `interval` params for distinct rule groups, then set `-dedup.minScrapeInterval` to
|
||||
@@ -645,7 +649,7 @@ two groups with `interval: 10s` and `interval: 15s`, then set `-dedup.minScrapeI
|
||||
keep only a single data point on 30s time interval for all rules. However, try to avoid having inconsistent `interval`
|
||||
values.
|
||||
|
||||
It is not recommended having `-dedup.minScrapeInterval` smaller than `-evaluationInterval`, as it may produce
|
||||
It is not recommended having `-dedup.minScrapeInterval` smaller than `-evaluationInterval`, as it may produce
|
||||
results with inconsistent intervals between data points.
|
||||
|
||||
Alertmanager will automatically deduplicate alerts with identical labels, so ensure that
|
||||
@@ -702,7 +706,7 @@ or reducing resolution) and push results to "cold" cluster.
|
||||
|
||||
`vmalert` configuration flags:
|
||||
|
||||
```
|
||||
```sh
|
||||
./bin/vmalert -rule=downsampling-rules.yml \ # Path to the file with rules configuration. Supports wildcard
|
||||
-datasource.url=http://raw-cluster-vmselect:8481/select/0/prometheus # vmselect addr for executing recording rules expressions
|
||||
-remoteWrite.url=http://aggregated-cluster-vminsert:8480/insert/0/prometheus # vminsert addr to persist recording rules results
|
||||
@@ -730,7 +734,6 @@ Using `vmagent` as a proxy provides additional benefits such as
|
||||
[data persisting when storage is unreachable](https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability),
|
||||
or time series modification via [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/).
|
||||
|
||||
|
||||
### Web
|
||||
|
||||
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
|
||||
@@ -756,7 +759,6 @@ This may be used for better integration with Grafana unified alerting system. Se
|
||||
* [How to query vmalert from single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
* [How to query vmalert from VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert)
|
||||
|
||||
|
||||
## Graphite
|
||||
|
||||
vmalert sends requests to `<-datasource.url>/render?format=json` during evaluation of alerting and recording rules
|
||||
@@ -787,7 +789,7 @@ See a blogpost about [Rules backfilling via vmalert](https://victoriametrics.com
|
||||
In `replay` mode vmalert works as a cli-tool and exits immediately after work is done.
|
||||
To run vmalert in `replay` mode:
|
||||
|
||||
```
|
||||
```sh
|
||||
./bin/vmalert -rule=path/to/your.rules \ # path to files with rules you usually use with vmalert
|
||||
-datasource.url=http://localhost:8428 \ # Prometheus HTTP API compatible datasource
|
||||
-remoteWrite.url=http://localhost:8428 \ # remote write compatible storage to persist results
|
||||
@@ -823,7 +825,7 @@ max range per request: 8h20m0s
|
||||
2021-06-07T09:59:12.098Z info app/vmalert/replay.go:68 replay finished! Imported 511734 samples
|
||||
```
|
||||
|
||||
> In replay mode, groups are executed sequentially in the defined order. Within each group, rules are also executed sequentially,
|
||||
> In replay mode, groups are executed sequentially in the defined order. Within each group, rules are also executed sequentially,
|
||||
regardless of the `concurrency` setting. This ensures that any potential chaining between rules is preserved (see `-replay.rulesDelay`).
|
||||
If you want rules to run concurrently based on the `concurrency` setting, set `-replay.rulesDelay=0`.
|
||||
|
||||
@@ -882,7 +884,7 @@ See full description for these flags in `./vmalert -help`.
|
||||
## Unit Testing for Rules
|
||||
|
||||
You can use `vmalert-tool` to test your alerting and recording rules like [promtool does](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/).
|
||||
See more details [here](https://docs.victoriametrics.com/victoriametrics/vmalert-tool/#unit-testing-for-rules).
|
||||
See more details in [vmalert-tool](https://docs.victoriametrics.com/victoriametrics/vmalert-tool/#unit-testing-for-rules).
|
||||
|
||||
## Monitoring
|
||||
|
||||
@@ -905,7 +907,8 @@ a review to the dashboard.
|
||||
### Common mistakes
|
||||
|
||||
Try the following tips to avoid common issues:
|
||||
1. Always set [group's interval](https://docs.victoriametrics.com/victoriametrics/vmalert/#groups) to be **equal to or greater than**
|
||||
|
||||
1. Always set [group's interval](https://docs.victoriametrics.com/victoriametrics/vmalert/#groups) to be **equal to or greater than**
|
||||
the [time series resolution](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series-resolution).
|
||||
1. Don't set labels with dynamic values to `labels` [param](https://docs.victoriametrics.com/victoriametrics/vmalert/#alerting-rules).
|
||||
* 👉 Example: setting `label: {{$value}}` to the rule will break its [alert state tracking](https://docs.victoriametrics.com/victoriametrics/vmalert/#alert-state)
|
||||
@@ -930,15 +933,15 @@ Try the following tips to avoid common issues:
|
||||
1. Increase `[lookbehind-window]` to help tolerate data delays.
|
||||
* 👉 Example: `max_over_time(node_memory_MemAvailable_bytes[10m]) > 0` will still work even if no data was present in the last 9 minutes.
|
||||
1. Don't skip step in [subqueries](https://docs.victoriametrics.com/victoriametrics/metricsql/#subqueries).
|
||||
* 👉 Example: `sum(count_over_time((metric == 0)[1h:]))` is missing a step after `1h:`.
|
||||
* 👉 Example: `sum(count_over_time((metric == 0)[1h:]))` is missing a step after `1h:`.
|
||||
In that case, the default step will be used (`-datasource.queryStep`) and may cause unexpected results compared to
|
||||
executing this query in vmui/Grafana, where step is adjusted differently.
|
||||
|
||||
|
||||
### Rule state
|
||||
|
||||
vmalert keeps the last `-rule.updateEntriesLimit` updates (or `update_entries_limit` [per-rule config](https://docs.victoriametrics.com/victoriametrics/vmalert/#alerting-rules))
|
||||
for each rule. You can see these updates in vmalert's [web UI](#web):
|
||||
|
||||
1. Open the `Groups` tab
|
||||
2. Find the Group and rule you're interested in
|
||||
3. Click the `Details` link next to rule's name and look at the `Last N updates` section:
|
||||
@@ -948,6 +951,7 @@ for each rule. You can see these updates in vmalert's [web UI](#web):
|
||||
The rows in this section show the rule's evaluations in order, along with their results.
|
||||
|
||||
Every state has the following attributes:
|
||||
|
||||
1. `Updated at` - the actual time when vmalert ran this rule.
|
||||
1. `Executed at` - the `time` param that was sent to the datasource with evaluation request.
|
||||
1. `Series returned` - the number of series returned in this evaluation:
|
||||
@@ -956,18 +960,19 @@ Every state has the following attributes:
|
||||
1. `Series fetched` - the number of series scanned during execution. See [never-firing alerts](#never-firing-alerts).
|
||||
1. `Duration` - how long it took to evaluate the rule.
|
||||
* If this time is close to or longer than the evaluation interval, some evaluations might be skipped.
|
||||
* See how to handle [slow queries](https://docs.victoriametrics.com/victoriametrics/troubleshooting/#slow-queries).
|
||||
1. `cURL` - a sample HTTP request that vmalert sent to `-datasource.url` during evaluation.
|
||||
* See how to handle [slow queries](https://docs.victoriametrics.com/victoriametrics/troubleshooting/#slow-queries).
|
||||
1. `cURL` - a sample HTTP request that vmalert sent to `-datasource.url` during evaluation.
|
||||
* It includes all headers and query parameters.
|
||||
* You can use this command to debug and see what the data source returned at that moment of time.
|
||||
* _Sensitive data is removed from the `curl` example – see the [security](#security) section for more info._
|
||||
|
||||
> If a specific entry shows **Series returned: 0**, but the **cURL command returns some data** when you execute it,
|
||||
it likely means there was no data in the data source at the exact time the rule was evaluated.
|
||||
it likely means there was no data in the data source at the exact time the rule was evaluated.
|
||||
See more about [data delay](#data-delay).
|
||||
|
||||
vmalert exposes `vmalert_recording_rules_last_evaluation_samples` for recording rules to represent the amount of series
|
||||
returned during evaluations. The following alerting rule can be used to detect those recording rules that produce no data:
|
||||
|
||||
```yaml
|
||||
- alert: RecordingRulesNoData
|
||||
expr: vmalert_recording_rules_last_evaluation_samples < 1
|
||||
@@ -980,7 +985,8 @@ _See more about alerting rules in [Monitoring](#monitoring)._
|
||||
### Alert state
|
||||
|
||||
Sometimes, it's hard to understand why a specific alert fired or not. Keep in mind the following:
|
||||
* Alerts with `for: 0` (or not set) fire **immediately** after the evaluation.
|
||||
|
||||
* Alerts with `for: 0` (or not set) fire **immediately** after the evaluation.
|
||||
* Alerts with `for > 0` fire **only after several evaluations in a row**, if the expression is true every time.
|
||||
|
||||
If evaluation returns error (i.e. datasource is unavailable), alert state doesn't change.
|
||||
@@ -991,14 +997,14 @@ If at least one evaluation returns no data, then alert's `for` state resets.
|
||||
|
||||
If `-remoteWrite.url` command-line flag is configured, vmalert will [persist alert's state](http://docs.victoriametrics.com/victoriametrics/vmalert/#alerts-state-on-restarts)
|
||||
in form of time series `ALERTS` and `ALERTS_FOR_STATE` to the specified destination. Such time series can be then queried via
|
||||
[vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui) or Grafana to track how
|
||||
[vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui) or Grafana to track how
|
||||
alerts state changed in time. See [query statistics dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/alert-statistics.json) as example for tracking historical alerts state.
|
||||
|
||||
### Data delay
|
||||
|
||||
Data delay is one of the most common problems when running rules.
|
||||
|
||||
vmalert runs the configured rules at specific timestamps.
|
||||
vmalert runs the configured rules at specific timestamps.
|
||||
It expects that the needed data is already available in the configured `-datasource.url` **at the time** the rule is evaluated.
|
||||
|
||||

|
||||
@@ -1011,8 +1017,8 @@ may get an empty response from the datasource, produce empty recording rules or
|
||||
> Please note, data delay is inevitable in distributed systems. And it is better to account for it rather than ignore it.
|
||||
|
||||
By default, recently written samples to VictoriaMetrics [aren't visible for queries](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#query-latency)
|
||||
for up to `30s` (see `-search.latencyOffset` command-line flag at vmselect or VictoriaMetrics single-node).
|
||||
Such delay is needed to eliminate the risk of incomplete data on the moment of querying. To compensate the latency in
|
||||
for up to `30s` (see `-search.latencyOffset` command-line flag at vmselect or VictoriaMetrics single-node).
|
||||
Such delay is needed to eliminate the risk of incomplete data on the moment of querying. To compensate the latency in
|
||||
timestamps for produced evaluation results, `-rule.evalDelay` is also set to `30s` by default.
|
||||
If you expect data to be delayed for longer intervals (it gets buffered, queued, or just network is slow sometimes),
|
||||
or you changed default value of `-search.latencyOffset` - consider increasing the `-rule.evalDelay` value accordingly.
|
||||
@@ -1024,6 +1030,7 @@ See [common mistakes](#common-mistakes) for recommendations for dealing with spo
|
||||
vmalert allows configuring more detailed logging for specific rule starting from {{% available_from "v1.116.0" %}}.
|
||||
Or for all rules within the [group](#groups) {{% available_from "v1.117.0" %}}.
|
||||
Just set `debug: true` in configuration and vmalert will start printing additional log messages:
|
||||
|
||||
```sh
|
||||
2022-09-15T13:35:41.155Z DEBUG alerting rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:41+02:00: query returned 0 series (elapsed: 5.896041ms, isPartial: false)
|
||||
2022-09-15T13:35:56.149Z DEBUG datasource request: executing POST request with params "denyPartialResponse=true&query=sum%28vm_tcplistener_conns%7Binstance%3D%22localhost%3A8429%22%7D%29+by%28instance%29+%3E+0&step=15s&time=1663248945"
|
||||
@@ -1041,15 +1048,17 @@ Sensitive info is stripped from the `curl` examples - see [security](#security)
|
||||
Transient alerts, that change state from inactive to firing too frequently, called **flapping alerts**.
|
||||
|
||||
The following expression will show how many times a specific rule switched its state over last 24h:
|
||||
|
||||
```promql
|
||||
max(changes(vmalert_alerts_firing[24h])) by(group, alertname) > 0
|
||||
```
|
||||
|
||||
How to reduce the chance for a rule to flap:
|
||||
|
||||
1. Use the `for <interval>:` setting with a value much larger than the `scrape_interval` for the series used in the expression.
|
||||
Note, the larger is `for`, the longer it takes for the alert to fire.
|
||||
1. Set a `[lookbehind-window]` in rollup expressions (i.e. `rate(http_errors_total[<lookbehind-window>]) > 0`) to at least **2× the `scrape_interval`** for the selected series.
|
||||
1. Use `keep_firing_for: <interval>` to delay alert resolution if the expression stops returning data.
|
||||
1. Use `keep_firing_for: <interval>` to delay alert resolution if the expression stops returning data.
|
||||
For example, for short CPU spikes, you may want to keep the alert active until CPU usage stays low for 5 minutes in a row.
|
||||
1. Make sure your rule can handle possible [data delays](#data-delay) from the datasource.
|
||||
1. Review the metric's past behavior to set a threshold that avoids triggering alerts too easily.
|
||||
@@ -1058,23 +1067,25 @@ See [common mistakes](#common-mistakes) for rules config.
|
||||
|
||||
### Never-firing alerts
|
||||
|
||||
vmalert can detect{{% available_from "v1.91.0" %}} if alert's expression doesn't match any time series in runtime.
|
||||
vmalert can detect {{% available_from "v1.91.0" %}} if alert's expression doesn't match any time series in runtime.
|
||||
This problem usually happens when alerting expression selects time series which aren't present in the datasource (i.e. wrong `job` label)
|
||||
or there is a typo in the series selector (i.e. `env=prodd`). Such alerting rules will be marked with special icon in
|
||||
or there is a typo in the series selector (i.e. `env=prodd`). Such alerting rules will be marked with special icon in
|
||||
vmalert UI and exposed via `vmalert_alerting_rules_last_evaluation_series_fetched` metric. The metric value will
|
||||
show how many time series were matched before the filtering by rule's expression. If metric value is `-1`, then
|
||||
this feature is not supported by the datasource (old versions of VictoriaMetrics). The following expression can be
|
||||
used to detect rules matching no series:
|
||||
|
||||
```
|
||||
max(vmalert_alerting_rules_last_evaluation_series_fetched) by(group, alertname) == 0
|
||||
```
|
||||
|
||||
See more details [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039) and
|
||||
See more details in this [GitHub Issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039) and
|
||||
read [Never-firing alerts](https://victoriametrics.com/blog/never-firing-alerts/) blogpost.
|
||||
|
||||
### Series with the same labelset
|
||||
|
||||
vmalert can produce the following error message:
|
||||
|
||||
```
|
||||
result contains metrics with the same labelset during evaluation
|
||||
```
|
||||
@@ -1083,6 +1094,7 @@ The error means there is a collision between [time series](https://docs.victoria
|
||||
during evaluation.
|
||||
|
||||
For example, a rule with `expr: {__name__=~"vmalert_alerts_.*"} > 0` returns two distinct time series in response:
|
||||
|
||||
```
|
||||
{__name__="vmalert_alerts_pending",job="vmalert",alertname="HostContextSwitching"} 12
|
||||
{__name__="vmalert_alerts_firing",job="vmalert",alertname="HostContextSwitching"} 0
|
||||
@@ -1098,7 +1110,7 @@ since it is expected it runs in an isolated trusted network.
|
||||
[Enterprise version of vmagent](https://docs.victoriametrics.com/victoriametrics/enterprise/) supports the ability to accept [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication)
|
||||
requests at this port, by specifying `-tls` and `-mtls` command-line flags. For example, the following command runs `vmalert`, which accepts only mTLS requests at port `8880`:
|
||||
|
||||
```
|
||||
```sh
|
||||
./vmalert -tls -mtls -remoteWrite.url=...
|
||||
```
|
||||
|
||||
@@ -1107,11 +1119,11 @@ It is possible to specify custom TLS Root CA via `-mtlsCAFile` command-line flag
|
||||
|
||||
## Security
|
||||
|
||||
See general recommendations regarding security [here](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#security).
|
||||
See general recommendations regarding [security](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#security).
|
||||
|
||||
vmalert [web UI](#web) exposes configuration details such as list of [Groups](#groups), active alerts,
|
||||
vmalert [web UI](#web) exposes configuration details such as list of [Groups](#groups), active alerts,
|
||||
[alerts state](#alerts-state-on-restarts), [notifiers](#notifier-configuration-file). Notifier addresses (sanitized) are attached
|
||||
as labels to metrics `vmalert_alerts_sent_.*` on `http://<vmalert>/metrics` page. Consider limiting user's access
|
||||
as labels to metrics `vmalert_alerts_sent_.*` on `http://<vmalert>/metrics` page. Consider limiting user's access
|
||||
to the web UI or `/metrics` page if this information is sensitive.
|
||||
|
||||
[Alerts state](#alerts-state-on-restarts) page or [debug mode](#debug-mode) could emit additional information about configured
|
||||
@@ -1126,20 +1138,16 @@ See also [mTLS protection docs](#mtls-protection).
|
||||
|
||||
* Memory profile. It can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:8880/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
|
||||
* CPU profile. It can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:8880/debug/pprof/profile > cpu.pprof
|
||||
```
|
||||
|
||||
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
@@ -1188,9 +1196,10 @@ the latter will have higher priority.
|
||||
|
||||
For chaining groups, they must be executed in a specific order, and the next group should be executed after
|
||||
the results from previous group are available in the datasource.
|
||||
In `vmalert`, user can specify `eval_offset` to achieve that{{% available_from "v1.113.0" %}}.
|
||||
In `vmalert`, user can specify `eval_offset` to achieve that {{% available_from "v1.113.0" %}}.
|
||||
|
||||
For example:
|
||||
|
||||
```yaml
|
||||
groups:
|
||||
- name: BaseGroup
|
||||
@@ -1222,29 +1231,33 @@ groups:
|
||||
- record: http_server_request_duration_seconds:sum_rate:5m:merged
|
||||
expr: |
|
||||
http_server_request_duration_seconds:sum_rate:5m:http_get
|
||||
or
|
||||
or
|
||||
http_server_request_duration_seconds:sum_rate:5m:http_post
|
||||
```
|
||||
|
||||
This configuration ensures that rules in `BaseGroup` are executed at(assuming vmalert starts at `12:00:00`):
|
||||
This configuration ensures that rules in `BaseGroup` are executed at(assuming vmalert starts at `12:00:00`):
|
||||
|
||||
```
|
||||
[12:00:10, 12:01:10, 12:02:10, 12:03:10...]
|
||||
```
|
||||
|
||||
while rules in group `TopGroup` are executed at:
|
||||
|
||||
```
|
||||
[12:00:40, 12:01:40, 12:02:40, 12:03:40...]
|
||||
```
|
||||
|
||||
As a result, `TopGroup` always gets the latest results of `BaseGroup`.
|
||||
|
||||
By default, the `eval_offset` values should be at least 30 seconds apart to accommodate the
|
||||
`-search.latencyOffset(default 30s)` command-line flag at vmselect or VictoriaMetrics single-node.
|
||||
By default, the `eval_offset` values should be at least 30 seconds apart to accommodate the
|
||||
`-search.latencyOffset(default 30s)` command-line flag at vmselect or VictoriaMetrics single-node.
|
||||
The minimum `eval_offset` gap can be adjusted accordingly with `-search.latencyOffset`.
|
||||
|
||||
### Notifier configuration file
|
||||
|
||||
Notifier also supports configuration via file specified with flag `notifier.config`:
|
||||
|
||||
```
|
||||
```sh
|
||||
./bin/vmalert -rule=app/vmalert/config/testdata/rules.good.rules \
|
||||
-datasource.url=http://localhost:8428 \
|
||||
-notifier.config=app/vmalert/notifier/testdata/consul.good.yaml
|
||||
@@ -1326,7 +1339,7 @@ bearer_token_file: <string>
|
||||
# see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#oauth2
|
||||
oauth2:
|
||||
[ <oauth2_config> ]
|
||||
|
||||
|
||||
# Optional list of HTTP headers in form `header-name: value`
|
||||
# applied for all requests to notifiers
|
||||
# For example:
|
||||
@@ -1395,6 +1408,7 @@ It is recommended using
|
||||
|
||||
You can build `vmalert` docker image from source and push it to your own docker repository.
|
||||
Run the following commands from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics):
|
||||
|
||||
```sh
|
||||
make package-vmalert
|
||||
docker tag victoria-metrics/vmalert:version my-repo:my-version-name
|
||||
|
||||
@@ -6,7 +6,7 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used peridically to ensure the flags in sync. -->
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used periodically to ensure the flags in sync. -->
|
||||
```shellhelp
|
||||
|
||||
vmalert processes alerts and recording rules.
|
||||
@@ -450,6 +450,8 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmalert/ .
|
||||
Limits the maxiMum duration for automatic alert expiration, which by default is 4 times evaluationInterval of the parent group
|
||||
-rule.resendDelay duration
|
||||
MiniMum amount of time to wait before resending an alert to notifier.
|
||||
-rule.resultsLimit int
|
||||
Limits the number of alerts or recording results a single rule can produce. Can be overridden by the limit option under group if specified. If exceeded, the rule will be marked with an error and all its results will be discarded. 0 means no limit. (default 0)
|
||||
-rule.stripFilePath
|
||||
Whether to strip file path in responses from the api/v1/rules API for files configured via -rule cmd-line flag. For example, the file path '/path/to/tenant_id/rules.yml' will be stripped to just 'rules.yml'. This flag might be useful to hide sensitive information in file path such as tenant ID. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-rule.templates array
|
||||
|
||||
@@ -12,9 +12,7 @@ aliases:
|
||||
- /vmauth/index.html
|
||||
- /vmauth/
|
||||
---
|
||||
`vmauth` is an HTTP proxy, which can [authorize](https://docs.victoriametrics.com/victoriametrics/vmauth/#authorization), [route](https://docs.victoriametrics.com/victoriametrics/vmauth/#routing)
|
||||
and [load balance](https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing) requests across [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) components
|
||||
or any other HTTP backends.
|
||||
`vmauth` is an HTTP proxy, which can [authorize](https://docs.victoriametrics.com/victoriametrics/vmauth/#authorization), [route](https://docs.victoriametrics.com/victoriametrics/vmauth/#routing) and [load balance](https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing) requests across [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) components or any other HTTP backends.
|
||||
|
||||
## Quick start
|
||||
|
||||
@@ -33,12 +31,11 @@ The port can be modified via `-httpListenAddr` command-line flag.
|
||||
See [how to reload config without restart](#config-reload).
|
||||
|
||||
Docker images for `vmauth` are available at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmauth/tags) and [Quay](https://quay.io/repository/victoriametrics/vmauth?tab=tags).
|
||||
See how `vmauth` used in [docker-compose env](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/README.md#victoriametrics-cluster).
|
||||
See how `vmauth` is used in [docker-compose env](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/README.md#victoriametrics-cluster).
|
||||
|
||||
Pass `-help` to `vmauth` in order to see all the supported command-line flags with their descriptions.
|
||||
|
||||
Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML,
|
||||
accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/).
|
||||
Feel free to [contact us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML, accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/).
|
||||
|
||||
## Use cases
|
||||
|
||||
@@ -75,11 +72,11 @@ See also [authorization](#authorization) and [routing](#routing) docs.
|
||||
|
||||
For example, the following [`-auth.config`](#auth-config) instructs `vmauth` to make the following:
|
||||
|
||||
- Requests starting with `/app1/` are proxied to `http://app1-backend/`, while the `/app1/` path prefix is dropped according to [`drop_src_path_prefix_parts`](#dropping-request-path-prefix).
|
||||
* Requests starting with `/app1/` are proxied to `http://app1-backend/`, while the `/app1/` path prefix is dropped according to [`drop_src_path_prefix_parts`](#dropping-request-path-prefix).
|
||||
For example, the request to `http://vmauth:8427/app1/foo/bar?baz=qwe` is proxied to `http://app1-backend/foo/bar?baz=qwe`.
|
||||
- Requests starting with `/app2/` are proxied to `http://app2-backend/`, while the `/app2/` path prefix is dropped according to [`drop_src_path_prefix_parts`](#dropping-request-path-prefix).
|
||||
* Requests starting with `/app2/` are proxied to `http://app2-backend/`, while the `/app2/` path prefix is dropped according to [`drop_src_path_prefix_parts`](#dropping-request-path-prefix).
|
||||
For example, the request to `http://vmauth:8427/app2/index.html` is proxied to `http://app2-backend/index.html`.
|
||||
- Other requests are proxied to `http://default-backed/`.
|
||||
* Other requests are proxied to `http://default-backed/`.
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -96,7 +93,7 @@ unauthorized_user:
|
||||
```
|
||||
|
||||
Sometimes it is needed to proxy all the requests, which do not match `url_map`, to a special `404` page, which could count invalid requests.
|
||||
Use `default_url` for this case. For example, the following [`-auth.config`](#auth-config) instructs `vmauth` sending all the requests,
|
||||
Use `default_url` for this case. For example, the following [`-auth.config`](#auth-config) instructs `vmauth` to send all the requests,
|
||||
which do not match `url_map`, to the `http://some-backend/404-page.html` page. The requested path is passed via `request_path` query arg.
|
||||
For example, the request to `http://vmauth:8427/foo/bar?baz=qwe` is proxied to `http://some-backend/404-page.html?request_path=%2Ffoo%2Fbar%3Fbaz%3Dqwe`.
|
||||
|
||||
@@ -137,8 +134,7 @@ See also [authorization](#authorization) and [routing](#routing) docs.
|
||||
|
||||
### Load balancer for vmagent
|
||||
|
||||
If [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) is used for processing [data push requests](https://docs.victoriametrics.com/victoriametrics/vmagent/#how-to-push-data-to-vmagent),
|
||||
then it is possible to scale the performance of data processing at `vmagent` by spreading load among multiple identically configured `vmagent` instances.
|
||||
If [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) is used for processing [data push requests](https://docs.victoriametrics.com/victoriametrics/vmagent/#how-to-push-data-to-vmagent), then it is possible to scale the performance of data processing at `vmagent` by spreading the load among multiple identically configured `vmagent` instances.
|
||||
This can be done with the following [config](#auth-config) for `vmauth`:
|
||||
|
||||
```yaml
|
||||
@@ -161,8 +157,7 @@ See also [authorization](#authorization) and [routing](#routing) docs.
|
||||
|
||||
### Load balancer for VictoriaMetrics cluster
|
||||
|
||||
[VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) accepts incoming data via `vminsert` nodes
|
||||
and processes incoming requests via `vmselect` nodes according to [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
[VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) accepts incoming data via `vminsert` nodes and processes incoming requests via `vmselect` nodes according to [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
`vmauth` can be used for balancing both `insert` and `select` requests among `vminsert` and `vmselect` nodes, when the following [`-auth.config`](#auth-config) is used:
|
||||
|
||||
```yaml
|
||||
@@ -189,8 +184,7 @@ See also [authorization](#authorization) and [routing](#routing) docs.
|
||||
### High availability
|
||||
|
||||
`vmauth` automatically switches from temporarily unavailable backend to other hot standby backends listed in `url_prefix`
|
||||
if it runs with `-loadBalancingPolicy=first_available` command-line flag. The load balancing policy can be overridden at `user` and `url_map` sections
|
||||
of [`-auth.config`](#auth-config) via `load_balancing_policy` option. For example, the following config instructs `vmauth` to proxy requests to `http://victoria-metrics-main:8428/` backend.
|
||||
if it runs with `-loadBalancingPolicy=first_available` command-line flag. The load balancing policy can be overridden at `user` and `url_map` sections of [`-auth.config`](#auth-config) via `load_balancing_policy` option. For example, the following config instructs `vmauth` to proxy requests to `http://victoria-metrics-main:8428/` backend.
|
||||
If this backend becomes unavailable, then `vmauth` starts proxying requests to `http://victoria-metrics-standby1:8428/`.
|
||||
If this backend becomes also unavailable, then requests are proxied to the last specified backend - `http://victoria-metrics-standby2:8428/`:
|
||||
|
||||
@@ -211,7 +205,7 @@ See also [authorization](#authorization) and [routing](#routing) docs.
|
||||
|
||||
`vmauth` can terminate HTTPS requests to backend services when it runs with the following command-line flags:
|
||||
|
||||
```
|
||||
```sh
|
||||
/path/to/vmauth -tls -tlsKeyFile=/path/to/tls_key_file -tlsCertFile=/path/to/tls_cert_file -httpListenAddr=0.0.0.0:443
|
||||
```
|
||||
|
||||
@@ -255,9 +249,7 @@ See also [authorization](#authorization), [routing](#routing) and [load balancin
|
||||
|
||||
### Per-tenant authorization
|
||||
|
||||
The following [`-auth.config`](#auth-config) instructs proxying `insert` and `select` requests from the [Basic Auth](https://en.wikipedia.org/wiki/Basic_access_authentication)
|
||||
user `tenant1` to the [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) `1`,
|
||||
while requests from the user `tenant2` are sent to tenant `2`:
|
||||
The following [`-auth.config`](#auth-config) instructs proxying `insert` and `select` requests from the [Basic Auth](https://en.wikipedia.org/wiki/Basic_access_authentication) user `tenant1` to the [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) `1`, while requests from the user `tenant2` are sent to tenant `2`:
|
||||
|
||||
```yaml
|
||||
users:
|
||||
@@ -300,9 +292,7 @@ to different backends depending on the following [subject fields](https://en.wik
|
||||
* `organization` aka `O`
|
||||
* `common_name` aka `CN`
|
||||
|
||||
For example, the following [`-auth.config`](#auth-config) routes requests from clients with `organizational_unit: finance` TLS certificates
|
||||
to `http://victoriametrics-finance:8428` backend, while requests from clients with `organizational_unit: devops` TLS certificates
|
||||
are routed to `http://victoriametrics-devops:8428` backend:
|
||||
For example, the following [`-auth.config`](#auth-config) routes requests from clients with `organizational_unit: finance` TLS certificates to `http://victoriametrics-finance:8428` backend, while requests from clients with `organizational_unit: devops` TLS certificates are routed to `http://victoriametrics-devops:8428` backend:
|
||||
|
||||
```yaml
|
||||
users:
|
||||
@@ -321,8 +311,7 @@ See also [authorization](#authorization), [routing](#routing) and [load balancin
|
||||
### Enforcing query args
|
||||
|
||||
`vmauth` can be configured for adding some mandatory query args before proxying requests to backends.
|
||||
For example, the following [config](#auth-config) adds [`extra_label`](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-enhancements)
|
||||
to all the requests, which are proxied to [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/):
|
||||
For example, the following [config](#auth-config) adds [`extra_label`](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-enhancements) to all the requests, which are proxied to [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/):
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -333,12 +322,10 @@ See also [authorization](#authorization), [routing](#routing) and [load balancin
|
||||
|
||||
## Dropping request path prefix
|
||||
|
||||
By default `vmauth` doesn't drop the path prefix from the original request when proxying the request to the matching backend.
|
||||
Sometimes it is needed to drop path prefix before proxying the request to the backend. This can be done by specifying the number of `/`-delimited
|
||||
prefix parts to drop from the request path via `drop_src_path_prefix_parts` option at `url_map` level or at `user` level or [`-auth.config`](#auth-config).
|
||||
By default, `vmauth` doesn't drop the path prefix from the original request when proxying the request to the matching backend.
|
||||
Sometimes it is needed to drop path prefix before proxying the request to the backend. This can be done by specifying the number of `/`-delimited prefix parts to drop from the request path via `drop_src_path_prefix_parts` option at `url_map` level or at `user` level or [`-auth.config`](#auth-config).
|
||||
|
||||
For example, if you need serving requests to [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) at `/vmalert/` path prefix,
|
||||
while serving requests to [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) at `/vmagent/` path prefix,
|
||||
For example, if you need to serve requests to [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) at `/vmalert/` path prefix, while serving requests to [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) at `/vmagent/` path prefix,
|
||||
then the following [-auth.config](#auth-config) can be used:
|
||||
|
||||
```yaml
|
||||
@@ -366,11 +353,11 @@ unauthorized_user:
|
||||
|
||||
`vmauth` supports the following authorization mechanisms:
|
||||
|
||||
- [No authorization](https://docs.victoriametrics.com/victoriametrics/vmauth/#simple-http-proxy)
|
||||
- [Basic Auth](https://docs.victoriametrics.com/victoriametrics/vmauth/#basic-auth-proxy)
|
||||
- [Bearer token](https://docs.victoriametrics.com/victoriametrics/vmauth/#bearer-token-auth-proxy)
|
||||
- [Client TLS certificate verification aka mTLS](https://docs.victoriametrics.com/victoriametrics/vmauth/#mtls-based-request-routing)
|
||||
- [Auth tokens via Arbitrary HTTP request headers](https://docs.victoriametrics.com/victoriametrics/vmauth/#reading-auth-tokens-from-other-http-headers)
|
||||
* [No authorization](https://docs.victoriametrics.com/victoriametrics/vmauth/#simple-http-proxy)
|
||||
* [Basic Auth](https://docs.victoriametrics.com/victoriametrics/vmauth/#basic-auth-proxy)
|
||||
* [Bearer token](https://docs.victoriametrics.com/victoriametrics/vmauth/#bearer-token-auth-proxy)
|
||||
* [Client TLS certificate verification aka mTLS](https://docs.victoriametrics.com/victoriametrics/vmauth/#mtls-based-request-routing)
|
||||
* [Auth tokens via Arbitrary HTTP request headers](https://docs.victoriametrics.com/victoriametrics/vmauth/#reading-auth-tokens-from-other-http-headers)
|
||||
|
||||
See also [security docs](#security), [routing docs](#routing) and [load balancing docs](#load-balancing).
|
||||
|
||||
@@ -378,11 +365,11 @@ See also [security docs](#security), [routing docs](#routing) and [load balancin
|
||||
|
||||
`vmauth` can proxy requests to different backends depending on the following parts of HTTP request:
|
||||
|
||||
- [Request path](#routing-by-path)
|
||||
- [Request host](#routing-by-host)
|
||||
- [Request query arg](#routing-by-query-arg)
|
||||
- [HTTP request header](#routing-by-header)
|
||||
- [Multiple parts](#routing-by-multiple-parts)
|
||||
* [Request path](#routing-by-path)
|
||||
* [Request host](#routing-by-host)
|
||||
* [Request query arg](#routing-by-query-arg)
|
||||
* [HTTP request header](#routing-by-header)
|
||||
* [Multiple parts](#routing-by-multiple-parts)
|
||||
|
||||
See also [authorization](#authorization) and [load balancing](#load-balancing).
|
||||
For debug purposes, extra logging for failed requests can be enabled by setting `dump_request_on_errors: true` {{% available_from "v1.107.0" %}} on user level. Please note, such logging may expose sensitive info and is recommended to use only for debugging.
|
||||
@@ -407,8 +394,7 @@ unauthorized_user:
|
||||
default_url: http://some-backend/404-page.html
|
||||
```
|
||||
|
||||
`src_paths` accepts a list of [regular expressions](https://github.com/google/re2/wiki/Syntax). The incoming request is routed to the given `url_prefix`
|
||||
if **the whole** requested path matches at least one `src_paths` entry.
|
||||
`src_paths` accepts a list of [regular expressions](https://github.com/google/re2/wiki/Syntax). The incoming request is routed to the given `url_prefix` if **the whole** requested path matches at least one `src_paths` entry.
|
||||
|
||||
See also [how to drop request path prefix](#dropping-request-path-prefix).
|
||||
|
||||
@@ -416,8 +402,7 @@ See also [how to drop request path prefix](#dropping-request-path-prefix).
|
||||
|
||||
`src_hosts` option can be specified inside `url_map` in order to route requests by host header.
|
||||
|
||||
The following [`-auth.config`](#auth-config) routes requests to `app1.my-host.com` host to `http://app1-backend`, while routing requests to `app2.my-host.com` host to `http://app2-backend`,
|
||||
and the rest of requests are routed to `http://some-backend/404-page.html`:
|
||||
The following [`-auth.config`](#auth-config) routes requests to `app1.my-host.com` host to `http://app1-backend`, while routing requests to `app2.my-host.com` host to `http://app2-backend`, and the rest of requests are routed to `http://some-backend/404-page.html`:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -431,15 +416,13 @@ unauthorized_user:
|
||||
default_url: http://some-backend/404-page.html
|
||||
```
|
||||
|
||||
`src_hosts` accepts a list of [regular expressions](https://github.com/google/re2/wiki/Syntax). The incoming request is routed to the given `url_prefix`
|
||||
if **the whole** request host matches at least one `src_hosts` entry.
|
||||
`src_hosts` accepts a list of [regular expressions](https://github.com/google/re2/wiki/Syntax). The incoming request is routed to the given `url_prefix` if **the whole** request host matches at least one `src_hosts` entry.
|
||||
|
||||
### Routing by query arg
|
||||
|
||||
`src_query_args` option can be specified inside `url_map` in order to route requests by the given [query arg](https://en.wikipedia.org/wiki/Query_string).
|
||||
|
||||
For example, the following [`-auth.config`](#auth-config) routes requests to `http://app1-backend/` if `db=foo` query arg is present in the request,
|
||||
while routing requests with `db` query arg starting with `bar` to `http://app2-backend`, and the rest of requests are routed to `http://some-backend/404-page.html`:
|
||||
For example, the following [`-auth.config`](#auth-config) routes requests to `http://app1-backend/` if `db=foo` query arg is present in the request, while routing requests with `db` query arg starting with `bar` to `http://app2-backend`, and the rest of requests are routed to `http://some-backend/404-page.html`:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -451,7 +434,8 @@ unauthorized_user:
|
||||
default_url: http://some-backend/404-page.html
|
||||
```
|
||||
|
||||
`src_query_args` accepts a list of strings in the format `arg=value` or `arg=~regex`. The `arg=value` format means exact matching of **the whole** `arg` query arg value to the given `value`.
|
||||
`src_query_args` accepts a list of strings in the format `arg=value` or `arg=~regex`.
|
||||
The `arg=value` format means exact matching of **the whole** `arg` query arg value to the given `value`.
|
||||
The `arg=~regex` format means regex matching of **the whole** `arg` query arg value to the given `regex`.
|
||||
If at least a single query arg in the request matches at least one `src_query_args` entry, then the request is routed to the given `url_prefix`.
|
||||
|
||||
@@ -459,8 +443,7 @@ If at least a single query arg in the request matches at least one `src_query_ar
|
||||
|
||||
`src_headers` option can be specified inside `url_map` in order to route requests by the given HTTP request header.
|
||||
|
||||
For example, the following [`-auth.config`](#auth-config) routes requests to `http://app1-backend` if `TenantID` request header equals to `42`, while routing requests to `http://app2-backend`
|
||||
if `TenantID` request header equals to `123:456`, and the rest of requests are routed to `http://some-backend/404-page.html`:
|
||||
For example, the following [`-auth.config`](#auth-config) routes requests to `http://app1-backend` if `TenantID` request header equals to `42`, while routing requests to `http://app2-backend` if `TenantID` request header equals to `123:456`, and the rest of requests are routed to `http://some-backend/404-page.html`:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -476,16 +459,14 @@ If `src_headers` contains multiple entries, then it is enough to match only a si
|
||||
|
||||
### Routing by multiple parts
|
||||
|
||||
Any subset of [`src_paths`](#routing-by-path), [`src_hosts`](#routing-by-host), [`src_query_args`](#routing-by-query-arg) and [`src_headers`](#routing-by-header)
|
||||
options can be specified simultaneously in a single `url_map` entry. In this case the request is routed to the given `url_prefix` if the request matches
|
||||
all the provided configs **simultaneously**.
|
||||
Any subset of [`src_paths`](#routing-by-path), [`src_hosts`](#routing-by-host), [`src_query_args`](#routing-by-query-arg) and [`src_headers`](#routing-by-header) options can be specified simultaneously in a single `url_map` entry. In this case the request is routed to the given `url_prefix` if the request matches all the provided configs **simultaneously**.
|
||||
|
||||
For example, the following [`-auth.config`](#auth-config) routes requests to `http://app1-backend` if all the conditions mentioned below are simultaneously met:
|
||||
|
||||
- the request path starts with `/app/`
|
||||
- the requested hostname ends with `.bar.baz`
|
||||
- the request contains `db=abc` query arg
|
||||
- the `TenantID` request header equals to `42`
|
||||
* the request path starts with `/app/`
|
||||
* the requested hostname ends with `.bar.baz`
|
||||
* the request contains `db=abc` query arg
|
||||
* the `TenantID` request header equals to `42`
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -501,7 +482,7 @@ unauthorized_user:
|
||||
|
||||
Each `url_prefix` in the [-auth.config](#auth-config) can be specified in the following forms:
|
||||
|
||||
- A single url. For example:
|
||||
* A single url. For example:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -510,7 +491,7 @@ Each `url_prefix` in the [-auth.config](#auth-config) can be specified in the fo
|
||||
|
||||
In this case `vmauth` proxies requests to the specified url.
|
||||
|
||||
- A list of urls. For example:
|
||||
* A list of urls. For example:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -527,8 +508,7 @@ Each `url_prefix` in the [-auth.config](#auth-config) can be specified in the fo
|
||||
`vmauth` automatically detects temporarily unavailable backends and spreads incoming queries among the remaining available backends.
|
||||
This allows restarting the backends and performing maintenance tasks on the backends without the need to remove them from the `url_prefix` list.
|
||||
|
||||
By default `vmauth` returns backend responses with all the http status codes to the client. It is possible to configure automatic retry of requests
|
||||
at other backends if the backend responds with status code specified in the `-retryStatusCodes` command-line flag.
|
||||
By default, `vmauth` returns backend responses with all the http status codes to the client. It is possible to configure automatic retry of requests at other backends if the backend responds with status code specified in the `-retryStatusCodes` command-line flag.
|
||||
It is possible to customize the list of http response status codes to retry via `retry_status_codes` list at `user` and `url_map` level of [`-auth.config`](#auth-config).
|
||||
For example, the following config re-tries requests on other backends if the current backend returns response with `500` or `502` HTTP status code:
|
||||
|
||||
@@ -541,10 +521,8 @@ Each `url_prefix` in the [-auth.config](#auth-config) can be specified in the fo
|
||||
retry_status_codes: [500, 502]
|
||||
```
|
||||
|
||||
By default `vmauth` uses `least_loaded` policy for spreading incoming requests among available backends.
|
||||
The policy can be changed to `first_available` via `-loadBalancingPolicy` command-line flag. In this case `vmauth`
|
||||
sends all the requests to the first specified backend while it is available. `vmauth` starts sending requests to the next
|
||||
specified backend when the first backend is temporarily unavailable.
|
||||
By default, `vmauth` uses `least_loaded` policy to spread the incoming requests among available backends.
|
||||
The policy can be changed to `first_available` via `-loadBalancingPolicy` command-line flag. In this case `vmauth` sends all the requests to the first specified backend while it is available. `vmauth` starts sending requests to the next specified backend when the first backend is temporarily unavailable.
|
||||
It is possible to customize the load balancing policy at the `user` and `url_map` level.
|
||||
For example, the following config specifies `first_available` load balancing policy for unauthorized requests:
|
||||
|
||||
@@ -558,9 +536,8 @@ Each `url_prefix` in the [-auth.config](#auth-config) can be specified in the fo
|
||||
|
||||
Load balancing feature can be used in the following cases:
|
||||
|
||||
- Balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/).
|
||||
The following [`-auth.config`](#auth-config) can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests
|
||||
or requests with 500 and 502 response status codes:
|
||||
* Balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/).
|
||||
The following [`-auth.config`](#auth-config) can be used to spread incoming requests among 3 vmselect nodes and re-trying failed requests or requests with 500 and 502 response status codes:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -571,11 +548,9 @@ Load balancing feature can be used in the following cases:
|
||||
retry_status_codes: [500, 502]
|
||||
```
|
||||
|
||||
- Sending select queries to the closest availability zone (AZ), while falling back to other AZs with identical data if the closest AZ is unavailable.
|
||||
For example, the following [`-auth.config`](#auth-config) sends select queries to `https://vmselect-az1/` and uses the `https://vmselect-az2/` as a fallback
|
||||
when `https://vmselect-az1/` is temporarily unavailable or cannot return full responses.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#cluster-availability) for details about `deny_partial_response` query arg,
|
||||
which is added to requests before they are proxied to backends.
|
||||
* Sending select queries to the closest availability zone (AZ), while falling back to other AZs with identical data if the closest AZ is unavailable.
|
||||
For example, the following [`-auth.config`](#auth-config) sends select queries to `https://vmselect-az1/` and uses the `https://vmselect-az2/` as a fallback when `https://vmselect-az1/` is temporarily unavailable or cannot return full responses.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#cluster-availability) for details about `deny_partial_response` query arg, which is added to requests before they are proxied to backends.
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -592,11 +567,10 @@ See also [discovering backend IPs](#discovering-backend-ips), [authorization](#a
|
||||
|
||||
## Discovering backend IPs
|
||||
|
||||
By default `vmauth` spreads load among the listed backends at `url_prefix` as described in [load balancing docs](#load-balancing).
|
||||
By default, `vmauth` spreads load among the listed backends at `url_prefix` as described in [load balancing docs](#load-balancing).
|
||||
Sometimes multiple backend instances can be hidden behind a single hostname. For example, `vmselect-service` hostname
|
||||
may point to a cluster of `vmselect` instances in [VictoriaMetrics cluster setup](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
So the following config may fail spreading load among available `vmselect` instances, since `vmauth` will send all the requests to the same url, which may end up
|
||||
to a single backend instance:
|
||||
So the following config may fail to spread load among available `vmselect` instances, since `vmauth` will send all the requests to the same url, which may end up to a single backend instance:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -605,7 +579,7 @@ unauthorized_user:
|
||||
|
||||
There are the following solutions for this issue:
|
||||
|
||||
- To enumerate every `vmselect` hostname or IP in the `url_prefix` list:
|
||||
* To enumerate every `vmselect` hostname or IP in the `url_prefix` list:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -615,10 +589,9 @@ There are the following solutions for this issue:
|
||||
- http://vmselect-3:8481/select/0/prometheus/
|
||||
```
|
||||
|
||||
This scheme works great, but it needs manual updating of the [`-auth.config`](#auth-config) every time `vmselect` services are restarted,
|
||||
downscaled or upscaled.
|
||||
This scheme works great, but it needs manual updating of the [`-auth.config`](#auth-config) every time `vmselect` services are restarted, downscaled or upscaled.
|
||||
|
||||
- To set `discover_backend_ips: true` option, so `vmauth` automatically discovers IPs behind the given hostname and then spreads load among the discovered IPs:
|
||||
* To set `discover_backend_ips: true` option, so `vmauth` automatically discovers IPs behind the given hostname and then spreads load among the discovered IPs:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -626,8 +599,7 @@ There are the following solutions for this issue:
|
||||
discover_backend_ips: true
|
||||
```
|
||||
|
||||
If the `url_prefix` contains hostname with `srv+` prefix, then the hostname without `srv+` prefix is automatically resolved via [DNS SRV](https://en.wikipedia.org/wiki/SRV_record)
|
||||
to the list of hostnames with TCP ports, and `vmauth` balances load among the discovered TCP addresses:
|
||||
If the `url_prefix` contains hostname with `srv+` prefix, then the hostname without `srv+` prefix is automatically resolved via [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) to the list of hostnames with TCP ports, and `vmauth` balances load among the discovered TCP addresses:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -637,15 +609,13 @@ There are the following solutions for this issue:
|
||||
|
||||
This functionality is useful for balancing load among backend instances, which run on different TCP ports, since DNS SRV records contain TCP ports.
|
||||
|
||||
The `discover_backend_ips` option can be specified at `user` and `url_map` level in the [`-auth.config`](#auth-config). It can also be enabled globally
|
||||
via `-discoverBackendIPs` command-line flag.
|
||||
The `discover_backend_ips` option can be specified at `user` and `url_map` level in the [`-auth.config`](#auth-config). It can also be enabled globally via `-discoverBackendIPs` command-line flag.
|
||||
|
||||
See also [load balancing docs](#load-balancing).
|
||||
|
||||
## SRV urls
|
||||
|
||||
If `url_prefix` contains url with the hostname starting with `srv+` prefix, then `vmauth` uses [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) lookup
|
||||
for the hostname without the `srv+` prefix and selects random TCP address (e.g. hostname plus TCP port) form the resolved results.
|
||||
If `url_prefix` contains url with the hostname starting with `srv+` prefix, then `vmauth` uses [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) lookup for the hostname without the `srv+` prefix and selects random TCP address (e.g. hostname plus TCP port) form the resolved results.
|
||||
|
||||
For example, if `some-addr` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) record contains `some-host:12345` TCP address,
|
||||
then `url_prefix: http://srv+some-addr/some/path` is automatically resolved into `url_prefix: http://some-host:12345/some/path`.
|
||||
@@ -656,9 +626,7 @@ See also [discovering backend addresses](#discovering-backend-ips).
|
||||
## Modifying HTTP headers
|
||||
|
||||
`vmauth` supports the ability to set and remove HTTP request headers before sending the requests to backends.
|
||||
This is done via `headers` option. For example, the following [`-auth.config`](#auth-config) sets `TenantID: foobar` header
|
||||
to requests proxied to `http://backend:1234/`. It also overrides `X-Forwarded-For` request header with an empty value. This effectively
|
||||
removes the `X-Forwarded-For` header from requests proxied to `http://backend:1234/`:
|
||||
This is done via `headers` option. For example, the following [`-auth.config`](#auth-config) sets `TenantID: foobar` header to requests proxied to `http://backend:1234/`. It also overrides `X-Forwarded-For` request header with an empty value. This effectively removes the `X-Forwarded-For` header from requests proxied to `http://backend:1234/`:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -697,10 +665,9 @@ See also [`Host` header docs](#host-http-header).
|
||||
|
||||
## Host HTTP header
|
||||
|
||||
By default `vmauth` sets the `Host` HTTP header to the backend hostname when proxying requests to the corresponding backend.
|
||||
By default, `vmauth` sets the `Host` HTTP header to the backend hostname when proxying requests to the corresponding backend.
|
||||
Sometimes it is needed to keep the original `Host` header from the client request sent to `vmauth`. For example, if backends use host-based routing.
|
||||
In this case set `keep_original_host: true`. For example, the following config instructs to use the original `Host` header from client requests
|
||||
when proxying requests to the `backend:1234`:
|
||||
In this case set `keep_original_host: true`. For example, the following config instructs to use the original `Host` header from client requests when proxying requests to the `backend:1234`:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -721,23 +688,23 @@ unauthorized_user:
|
||||
|
||||
`vmauth` supports dynamic reload of [`-auth.config`](#auth-config) via the following ways:
|
||||
|
||||
- By sending `SIGHUP` signal to `vmauth` process:
|
||||
```
|
||||
* By sending `SIGHUP` signal to `vmauth` process:
|
||||
|
||||
```sh
|
||||
kill -HUP `pidof vmauth`
|
||||
```
|
||||
- By querying `/-/reload` endpoint. It is recommended protecting it with `-reloadAuthKey`. See [security docs](#security) for details.
|
||||
- By passing the interval for config check to `-configCheckInterval` command-line flag.
|
||||
|
||||
* By querying `/-/reload` endpoint. It is recommended to protect it with `-reloadAuthKey`. See [security docs](#security) for details.
|
||||
* By passing the interval for config check to `-configCheckInterval` command-line flag.
|
||||
|
||||
## Concurrency limiting
|
||||
|
||||
`vmauth` may limit the number of concurrent requests according to the following command-line flags:
|
||||
|
||||
- `-maxConcurrentRequests` limits the global number of concurrent requests `vmauth` can serve across all the configured users.
|
||||
- `-maxConcurrentPerUserRequests` limits the number of concurrent requests `vmauth` can serve per each configured user.
|
||||
* `-maxConcurrentRequests` limits the global number of concurrent requests `vmauth` can serve across all the configured users.
|
||||
* `-maxConcurrentPerUserRequests` limits the number of concurrent requests `vmauth` can serve per each configured user.
|
||||
|
||||
It is also possible to set individual limits on the number of concurrent requests per each user
|
||||
with the `max_concurrent_requests` option. For example, the following [`-auth.config`](#auth-config)
|
||||
limits the number of concurrent requests from the user `foo` to 10:
|
||||
It is also possible to set individual limits on the number of concurrent requests per each user with the `max_concurrent_requests` option. For example, the following [`-auth.config`](#auth-config) limits the number of concurrent requests from the user `foo` to 10:
|
||||
|
||||
```yaml
|
||||
users:
|
||||
@@ -751,28 +718,24 @@ users:
|
||||
|
||||
The following [metrics](#monitoring) related to concurrency limits are exposed by `vmauth`:
|
||||
|
||||
- `vmauth_concurrent_requests_capacity` - the global limit on the number of concurrent requests `vmauth` can serve.
|
||||
* `vmauth_concurrent_requests_capacity` - the global limit on the number of concurrent requests `vmauth` can serve.
|
||||
It is set via `-maxConcurrentRequests` command-line flag.
|
||||
- `vmauth_concurrent_requests_current` - the current number of concurrent requests `vmauth` processes.
|
||||
- `vmauth_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
* `vmauth_concurrent_requests_current` - the current number of concurrent requests `vmauth` processes.
|
||||
* `vmauth_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the global concurrency limit has been reached.
|
||||
- `vmauth_user_concurrent_requests_capacity{username="..."}` - the limit on the number of concurrent requests for the given `username`.
|
||||
- `vmauth_user_concurrent_requests_current{username="..."}` - the current number of concurrent requests for the given `username`.
|
||||
- `vmauth_user_concurrent_requests_limit_reached_total{username="..."}` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for the given `username`.
|
||||
- `vmauth_unauthorized_user_concurrent_requests_capacity` - the limit on the number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
- `vmauth_unauthorized_user_concurrent_requests_current` - the current number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
- `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
|
||||
* `vmauth_user_concurrent_requests_capacity{username="..."}` - the limit on the number of concurrent requests for the given `username`.
|
||||
* `vmauth_user_concurrent_requests_current{username="..."}` - the current number of concurrent requests for the given `username`.
|
||||
* `vmauth_user_concurrent_requests_limit_reached_total{username="..."}` - the number of requests rejected with `429 Too Many Requests` error because of the concurrency limit has been reached for the given `username`.
|
||||
* `vmauth_unauthorized_user_concurrent_requests_capacity` - the limit on the number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
* `vmauth_unauthorized_user_concurrent_requests_current` - the current number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
* `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
|
||||
|
||||
## Backend TLS setup
|
||||
|
||||
By default `vmauth` uses system settings when performing requests to HTTPS backends specified via `url_prefix` option
|
||||
in the [`-auth.config`](#auth-config). These settings can be overridden with the following command-line flags:
|
||||
By default, `vmauth` uses system settings when performing requests to HTTPS backends specified via `url_prefix` option in the [`-auth.config`](#auth-config). These settings can be overridden with the following command-line flags:
|
||||
|
||||
- `-backend.tlsInsecureSkipVerify` allows skipping TLS verification when connecting to HTTPS backends.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](#auth-config)
|
||||
via `tls_insecure_skip_verify` option. For example:
|
||||
* `-backend.tlsInsecureSkipVerify` allows skipping TLS verification when connecting to HTTPS backends.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](#auth-config) via `tls_insecure_skip_verify` option. For example:
|
||||
|
||||
```yaml
|
||||
- username: "foo"
|
||||
@@ -780,9 +743,9 @@ in the [`-auth.config`](#auth-config). These settings can be overridden with the
|
||||
tls_insecure_skip_verify: true
|
||||
```
|
||||
|
||||
- `-backend.tlsCAFile` allows specifying the path to TLS Root CA for verifying backend TLS certificates.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](#auth-config)
|
||||
via `tls_ca_file` option. For example:
|
||||
* `-backend.tlsCAFile` allows specifying the path to TLS Root CA for verifying backend TLS certificates.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](#auth-config) via `tls_ca_file` option.
|
||||
For example:
|
||||
|
||||
```yaml
|
||||
- username: "foo"
|
||||
@@ -790,10 +753,9 @@ in the [`-auth.config`](#auth-config). These settings can be overridden with the
|
||||
tls_ca_file: "/path/to/tls/root/ca"
|
||||
```
|
||||
|
||||
- `-backend.tlsCertFile` and `-backend.tlsKeyFile` allows specifying client TLS certificate for passing in requests to HTTPS backends,
|
||||
* `-backend.tlsCertFile` and `-backend.tlsKeyFile` allows specifying client TLS certificate for passing in requests to HTTPS backends,
|
||||
so these certificate could be verified at the backend side (aka [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication)).
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](#auth-config)
|
||||
via `tls_cert_file` and `tls_key_file` options. For example:
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](#auth-config) via `tls_cert_file` and `tls_key_file` options. For example:
|
||||
|
||||
```yaml
|
||||
- username: "foo"
|
||||
@@ -802,9 +764,8 @@ in the [`-auth.config`](#auth-config). These settings can be overridden with the
|
||||
tls_key_file: "/path/to/tls/key"
|
||||
```
|
||||
|
||||
- `-backend.tlsServerName` allows specifying optional [TLS ServerName](https://en.wikipedia.org/wiki/Server_Name_Indication) for passing in requests to HTTPS backends.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](#auth-config)
|
||||
via `tls_server_name` option. For example:
|
||||
* `-backend.tlsServerName` allows specifying optional [TLS ServerName](https://en.wikipedia.org/wiki/Server_Name_Indication) for passing in requests to HTTPS backends.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](#auth-config) via `tls_server_name` option. For example:
|
||||
|
||||
```yaml
|
||||
- username: "foo"
|
||||
@@ -815,7 +776,6 @@ in the [`-auth.config`](#auth-config). These settings can be overridden with the
|
||||
The `-backend.tlsCAFile`, `-backend.tlsCertFile`, `-backend.tlsKeyFile`, `tls_ca_file`, `tls_cert_file` and `tls_key_file` may point either to local file or to `http` / `https` url.
|
||||
The file is checked for modifications every second and is automatically re-read when it is updated.
|
||||
|
||||
|
||||
## IP filters
|
||||
|
||||
[Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/) of `vmauth` can be configured to allow / deny incoming requests via global and per-user IP filters.
|
||||
@@ -850,15 +810,17 @@ By default, the client's TCP address is utilized for IP filtering. In scenarios
|
||||
* `-httpListenAddr.useProxyProtocol=true`
|
||||
|
||||
### Security Considerations
|
||||
|
||||
**HTTP headers are inherently untrustworthy.** It is strongly recommended to implement additional security measures, such as:
|
||||
|
||||
* Dropping `X-Forwarded-For` headers at the internet-facing reverse proxy (e.g., before traffic reaches `vmauth`).
|
||||
* Dropping `X-Forwarded-For` headers at the internet-facing reverse proxy (e.g., before traffic reaches `vmauth`).
|
||||
* Do not use `-httpRealIPHeader` at internet-facing `vmauth`.
|
||||
* Add `removeXFFHTTPHeaderValue` for the internet-facing `vmauth`. It instructs `vmauth` to replace value of `X-Forwarded-For` HTTP header with `remoteAddr` of the client.
|
||||
* Add `removeXFFHTTPHeaderValue` for the internet-facing `vmauth`. It instructs `vmauth` to replace value of `X-Forwarded-For` HTTP header with `remoteAddr` of the client.
|
||||
|
||||
See additional recommendations at [link](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For#security_and_privacy_concerns)
|
||||
See additional recommendations for [security and privacy concerns](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For#security_and_privacy_concerns)
|
||||
|
||||
### Per-User Configuration
|
||||
|
||||
The values of `httpRealIPHeader` {{% available_from "v1.107.0" %}} can be changed on a per-user basis within the user-specific configuration.
|
||||
|
||||
```yaml
|
||||
@@ -877,7 +839,7 @@ users:
|
||||
real_ip_header: CF-Connecting-IP
|
||||
```
|
||||
|
||||
See config example of using IP filters [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/example_config_ent.yml).
|
||||
See config example of using [IP filters](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/example_config_ent.yml).
|
||||
|
||||
## Reading auth tokens from other HTTP headers
|
||||
|
||||
@@ -885,14 +847,14 @@ See config example of using IP filters [here](https://github.com/VictoriaMetrics
|
||||
It is possible to read these auth tokens from any other request header by specifying it via `-httpAuthHeader` command-line flag.
|
||||
For example, the following command instructs `vmauth` to read auth token from `X-Amz-Firehose-Access-Key` header:
|
||||
|
||||
```
|
||||
```sh
|
||||
./vmauth -httpAuthHeader='X-Amz-Firehose-Access-Key'
|
||||
```
|
||||
|
||||
It is possible to read auth tokens from multiple headers. For example, the following command instructs `vmauth` to read auth token
|
||||
from both `Authorization` and `X-Amz-Firehose-Access-Key` headers:
|
||||
|
||||
```
|
||||
```sh
|
||||
./vmauth -httpAuthHeader='Authorization' -httpAuthHeader='X-Amz-Firehose-Access-Key'
|
||||
```
|
||||
|
||||
@@ -900,21 +862,16 @@ See also [authorization docs](#authorization) and [security docs](#security).
|
||||
|
||||
## Query args handling
|
||||
|
||||
By default `vmauth` sends all the query args specified in the `url_prefix` to the backend. It also proxies query args from client requests
|
||||
if they do not clash with the args specified in the `url_prefix`. This is needed for security, e.g. it disallows the client overriding
|
||||
security-sensitive query args specified at the `url_prefix` such as `tenant_id`, `password`, `auth_key`, `extra_filters`, etc.
|
||||
By default, `vmauth` sends all the query args specified in the `url_prefix` to the backend. It also proxies query args from client requests if they do not clash with the args specified in the `url_prefix`. This is needed for security, e.g. it disallows the client overriding security-sensitive query args specified at the `url_prefix` such as `tenant_id`, `password`, `auth_key`, `extra_filters`, etc.
|
||||
|
||||
`vmauth` provides the ability to specify a list of query args, which can be proxied from the client request to the backend
|
||||
if they clash with the args specified in the `url_prefix`. In this case the client query args are added to the args from the `url_prefix`
|
||||
before being proxied to the backend. This can be done via the following options:
|
||||
`vmauth` provides the ability to specify a list of query args, which can be proxied from the client request to the backend if they clash with the args specified in the `url_prefix`. In this case the client query args are added to the args from the `url_prefix` before being proxied to the backend. This can be done via the following options:
|
||||
|
||||
- Via `-mergeQueryArgs` command-line flag. This flag may contain comma-separated list of client query arg names, which are allowed
|
||||
* Via `-mergeQueryArgs` command-line flag. This flag may contain comma-separated list of client query arg names, which are allowed
|
||||
to merge with the `url_prefix` query args when sending the request to the backend. This option is applied globally to all the configured backends.
|
||||
|
||||
- Via `merge_query_args` option at the `user` and `url_map` level. These values override the `-mergeQueryArgs` command-line flag.
|
||||
* Via `merge_query_args` option at the `user` and `url_map` level. These values override the `-mergeQueryArgs` command-line flag.
|
||||
|
||||
The example below sends the request to `http://victoria-logs:9429/select/logsql/query?extra_filters={env="prod"}&extra_filters={team="dev"}&query=error`
|
||||
when `vmauth` receives request to `http://vmauth/select/logsql/query?extra_filters={team="dev"}&query=error`:
|
||||
The example below sends the request to `http://victoria-logs:9429/select/logsql/query?extra_filters={env="prod"}&extra_filters={team="dev"}&query=error` when `vmauth` receives request to `http://vmauth/select/logsql/query?extra_filters={team="dev"}&query=error`:
|
||||
|
||||
```yaml
|
||||
unauthorized_user:
|
||||
@@ -1078,15 +1035,14 @@ This may be useful for passing secrets to the config.
|
||||
|
||||
## mTLS protection
|
||||
|
||||
By default `vmauth` accepts http requests at `8427` port (this port can be changed via `-httpListenAddr` command-line flags).
|
||||
[Enterprise version of vmauth](https://docs.victoriametrics.com/victoriametrics/enterprise/) supports the ability to accept [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication)
|
||||
requests at this port, by specifying `-tls` and `-mtls` command-line flags. For example, the following command runs `vmauth`, which accepts only mTLS requests at port `8427`:
|
||||
By default, `vmauth` accepts http requests at `8427` port (this port can be changed via `-httpListenAddr` command-line flags).
|
||||
[Enterprise version of vmauth](https://docs.victoriametrics.com/victoriametrics/enterprise/) supports the ability to accept [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication) requests at this port, by specifying `-tls` and `-mtls` command-line flags. For example, the following command runs `vmauth`, which accepts only mTLS requests at port `8427`:
|
||||
|
||||
```
|
||||
```sh
|
||||
./vmauth -tls -mtls -auth.config=...
|
||||
```
|
||||
|
||||
By default system-wide [TLS Root CA](https://en.wikipedia.org/wiki/Root_certificate) is used for verifying client certificates if `-mtls` command-line flag is specified.
|
||||
By default, system-wide [TLS Root CA](https://en.wikipedia.org/wiki/Root_certificate) is used to verify client certificates, if `-mtls` command-line flag is specified.
|
||||
It is possible to specify custom TLS Root CA via `-mtlsCAFile` command-line flag.
|
||||
|
||||
See also [automatic issuing of TLS certificates](#automatic-issuing-of-tls-certificates) and [mTLS-based request routing](#mtls-based-request-routing).
|
||||
@@ -1112,7 +1068,8 @@ See [these docs](#mtls-protection) on how to enable [mTLS](https://en.wikipedia.
|
||||
|
||||
Alternatively, [TLS termination proxy](https://en.wikipedia.org/wiki/TLS_termination_proxy) may be put in front of `vmauth`.
|
||||
|
||||
It is recommended protecting the following endpoints with authKeys:
|
||||
It is recommended to protect the following endpoints with authKeys:
|
||||
|
||||
* `/-/reload` with `-reloadAuthKey` command-line flag, so external users couldn't trigger config reload.
|
||||
* `/flags` with `-flagsAuthKey` command-line flag, so unauthorized users couldn't get command-line flag values.
|
||||
* `/metrics` with `-metricsAuthKey` command-line flag, so unauthorized users couldn't access [vmauth metrics](#monitoring).
|
||||
@@ -1127,12 +1084,11 @@ As an alternative, it's possible to serve internal API routes at the different l
|
||||
`vmauth` [Enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/) supports automatic issuing of TLS certificates via [Let's Encrypt service](https://letsencrypt.org/).
|
||||
The following command-line flags must be set in order to enable automatic issuing of TLS certificates:
|
||||
|
||||
- `-httpListenAddr` must be set for listening TCP port `443`. For example, `-httpListenAddr=:443`. This port must be accessible by the [Let's Encrypt service](https://letsencrypt.org/).
|
||||
- `-tls` must be set in order to accept HTTPS requests at `-httpListenAddr`. Note that `-tlcCertFile` and `-tlsKeyFile` aren't needed when automatic TLS certificate issuing is enabled.
|
||||
- `-tlsAutocertHosts` must be set to comma-separated list of hosts, which can be reached via `-httpListenAddr`. TLS certificates are automatically issued for these hosts.
|
||||
- `-tlsAutocertEmail` must be set to contact email for the issued TLS certificates.
|
||||
- `-tlsAutocertCacheDir` may be set to the directory path for persisting the issued TLS certificates between `vmauth` restarts. If this flag isn't set,
|
||||
then TLS certificates are re-issued on every restart.
|
||||
* `-httpListenAddr` must be set to listen on TCP port `443`. For example, `-httpListenAddr=:443`. This port must be accessible by the [Let's Encrypt service](https://letsencrypt.org/).
|
||||
* `-tls` must be set in order to accept HTTPS requests at `-httpListenAddr`. Note that `-tlcCertFile` and `-tlsKeyFile` aren't needed when automatic TLS certificate issuing is enabled.
|
||||
* `-tlsAutocertHosts` must be set to comma-separated list of hosts, which can be reached via `-httpListenAddr`. TLS certificates are automatically issued for these hosts.
|
||||
* `-tlsAutocertEmail` must be set to contact email for the issued TLS certificates.
|
||||
* `-tlsAutocertCacheDir` may be set to the directory path to persist the issued TLS certificates between `vmauth` restarts. If this flag isn't set, then TLS certificates are re-issued on every restart.
|
||||
|
||||
This functionality can be evaluated for free according to [these docs](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
|
||||
@@ -1140,10 +1096,8 @@ See also [security recommendations](#security).
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vmauth` exports various metrics in Prometheus exposition format at `http://vmauth-host:8427/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) or via Prometheus-compatible scraper, so the exported metrics could be analyzed later.
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/21394) and [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts-vmauth.yml)
|
||||
for `vmauth` monitoring.
|
||||
`vmauth` exports various metrics in Prometheus exposition format at `http://vmauth-host:8427/metrics` page. It is recommended to set up regular scraping of this page either via [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) or via Prometheus-compatible scraper, so the exported metrics could be analyzed later.
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/21394) and [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/alerts-vmauth.yml) for `vmauth` monitoring.
|
||||
|
||||
If you use Google Cloud Managed Prometheus for scraping metrics from VictoriaMetrics components, then pass `-metrics.exposeMetadata`
|
||||
command-line to them, so they add `TYPE` and `HELP` comments per each exposed metric at `/metrics` page.
|
||||
@@ -1172,8 +1126,7 @@ users:
|
||||
# other config options here
|
||||
```
|
||||
|
||||
Additional labels for per-user metrics can be specified via `metric_labels` section. For example, the following config
|
||||
defines `{dc="eu",team="dev"}` labels additionally to `username="foobar"` label:
|
||||
Additional labels for per-user metrics can be specified via `metric_labels` section. For example, the following config defines `{dc="eu",team="dev"}` labels additionally to `username="foobar"` label:
|
||||
|
||||
```yaml
|
||||
users:
|
||||
@@ -1189,15 +1142,13 @@ users:
|
||||
* `vmauth_unauthorized_user_requests_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of unauthorized requests served
|
||||
* `vmauth_unauthorized_user_request_backend_errors_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of unauthorized request errors
|
||||
* `vmauth_unauthorized_user_request_duration_seconds` [summary](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#summary) - the duration of unauthorized requests
|
||||
* `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of failed unauthorized requests
|
||||
because of exceeded [concurrency limits](#concurrency-limiting)
|
||||
* `vmauth_unauthorized_user_concurrent_requests_capacity` [gauge](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#gauge) - the maximum number
|
||||
of [concurrent unauthorized requests](#concurrency-limiting)
|
||||
* `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) - the number of failed unauthorized requests because of exceeded [concurrency limits](#concurrency-limiting)
|
||||
* `vmauth_unauthorized_user_concurrent_requests_capacity` [gauge](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#gauge) - the maximum number of [concurrent unauthorized requests](#concurrency-limiting)
|
||||
* `vmauth_unauthorized_user_concurrent_requests_current` [gauge](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#gauge) - the current number of [concurrent unauthorized requests](#concurrency-limiting)
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - `vmauth` is located in `vmutils-*` archives there.
|
||||
It is recommended to use [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - `vmauth` is located in `vmutils-*` archives there.
|
||||
|
||||
### Development build
|
||||
|
||||
@@ -1217,8 +1168,7 @@ Run `make package-vmauth`. It builds `victoriametrics/vmauth:<PKG_TAG>` docker i
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmauth`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```sh
|
||||
ROOT_IMAGE=scratch make package-vmauth
|
||||
@@ -1230,24 +1180,20 @@ ROOT_IMAGE=scratch make package-vmauth
|
||||
|
||||
* Memory profile. It can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:8427/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
|
||||
* CPU profile. It can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:8427/debug/pprof/profile > cpu.pprof
|
||||
```
|
||||
|
||||
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
It is safe sharing the collected profiles from security point of view, since they do not contain sensitive information.
|
||||
It is safe to share the collected profiles from security point of view, since they do not contain sensitive information.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
|
||||
@@ -12,21 +12,47 @@ aliases:
|
||||
- /vmbackup/index.html
|
||||
- /vmbackup/
|
||||
---
|
||||
`vmbackup` creates VictoriaMetrics data backups from instant snapshots.
|
||||
More information how to work with them could be found in [instant snapshots](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-work-with-snapshots) documentation.
|
||||
|
||||
`vmbackup` supports incremental and full backups. Incremental backups are created automatically if the destination path already contains data from the previous backup.
|
||||
Full backups can be accelerated with `-origin` pointing to an already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
|
||||
data between the existing backup and new backup. It saves time and costs on data transfer.
|
||||
|
||||
`vmbackup` creates backups of VictoriaMetrics data to protect against hardware failures and accidental data loss.
|
||||
Whether you are using a single-node or a cluster version, it is recommended to use `vmbackup` to perform periodical data backup from instant snapshots.
|
||||
More information on how to work with them could be found in [instant snapshots documentation](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-work-with-snapshots).
|
||||
Backup process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmbackup` with the same args.
|
||||
|
||||
Backed up data can be restored with [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/).
|
||||
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
|
||||
See also [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/) tool built on top of `vmbackup`. This tool simplifies
|
||||
creation of hourly, daily, weekly and monthly backups.
|
||||
If you are running enterprise version, you can also use [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/) tool built on top of `vmbackup`.
|
||||
This tool simplifies the creation of hourly, daily, weekly and monthly backups.
|
||||
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
|
||||
|
||||
### Single node backup
|
||||
To make a complete backup for VictoriaMetrics single node run the following command:
|
||||
```sh
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup>
|
||||
```
|
||||
|
||||
Please refer to [full backup creation](https://docs.victoriametrics.com/victoriametrics/vmbackup/#full-backups) for further details.
|
||||
We recommend using [smart backup](https://docs.victoriametrics.com/victoriametrics/vmbackup/#smart-backups) strategy as a best practice.
|
||||
|
||||
### Cluster backup {id="backups-for-victoriametrics-cluster"}
|
||||
|
||||
To make a complete backup for [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), `vmbackup` must be run on each `vmstorage` node in cluster. Backups must
|
||||
be placed into different directories on the remote storage in order to avoid conflicts between backups from different nodes.
|
||||
|
||||
For example, run the following command to make a backup for 3 `vmstorage` nodes:
|
||||
|
||||
```sh
|
||||
vmstorage-1$ /vmbackup -storageDataPath=</path/to/vmstorage-data> -snapshot.createURL=http://vmstorage1:8482/snapshot/create -dst=gs://<bucket>/vmstorage-1
|
||||
vmstorage-2$ /vmbackup -storageDataPath=</path/to/vmstorage-data> -snapshot.createURL=http://vmstorage2:8482/snapshot/create -dst=gs://<bucket>/vmstorage-2
|
||||
vmstorage-3$ /vmbackup -storageDataPath=</path/to/vmstorage-data> -snapshot.createURL=http://vmstorage3:8482/snapshot/create -dst=gs://<bucket>/vmstorage-3
|
||||
````
|
||||
|
||||
Note that `vmbackup` needs access to data folder on every `vmstorage` node.
|
||||
For Kubernetes deployments it is recommended to use [sidecar containers](https://kubernetes.io/docs/concepts/workloads/pods/sidecar-containers/) for running `vmbackup` on the same pod with `vmstorage`.
|
||||
|
||||
|
||||
Further in this document you can find [how backup mechanism works step by step](https://docs.victoriametrics.com/victoriametrics/vmbackup/#how-does-it-work), [troubleshooting](https://docs.victoriametrics.com/victoriametrics/vmbackup/#troubleshooting) and [advanced configuration](https://docs.victoriametrics.com/victoriametrics/vmbackup/#advanced-usage) sections
|
||||
|
||||
## Supported storage types
|
||||
|
||||
@@ -38,23 +64,33 @@ creation of hourly, daily, weekly and monthly backups.
|
||||
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio) or [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/). See [these docs](#advanced-usage) for details.
|
||||
* Local filesystem. Example: `fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
|
||||
|
||||
## Use cases
|
||||
## Backup types {id="use-cases"}
|
||||
|
||||
### Regular backups
|
||||
`vmbackup` supports [incremental](https://docs.victoriametrics.com/victoriametrics/vmbackup/#incremental-backups) and [full](https://docs.victoriametrics.com/victoriametrics/vmbackup/#full-backups) backups. Incremental backups are created automatically if the destination path already contains data from the previous backup.
|
||||
Full backups can be accelerated with `-origin` pointing to an already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
|
||||
data between the existing backup and new backup. It saves time and costs on data transfer.
|
||||
|
||||
Regular backup can be performed with the following command:
|
||||
All commands below are provided for a single node version.
|
||||
For a cluster version `vmbackup` should be executed on each vmstorage node and `-snapshot.createUrl` should point to vmstorage:
|
||||
```sh
|
||||
./vmbackup -storageDataPath=</path/to/vmstorage-data> -snapshot.createURL=http://vmstorage1:8482/snapshot/create -dst=gs://<bucket>/vmstorage-1
|
||||
```
|
||||
|
||||
### Full backups
|
||||
|
||||
Full backup can be performed on a single node with the following command:
|
||||
|
||||
```sh
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup>
|
||||
```
|
||||
|
||||
* `</path/to/victoria-metrics-data>` - path to VictoriaMetrics data pointed by `-storageDataPath` command-line flag in single-node VictoriaMetrics or in cluster `vmstorage`.
|
||||
There is no need to stop VictoriaMetrics for creating backups since they are performed from immutable [instant snapshots](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-work-with-snapshots).
|
||||
There is no need to stop VictoriaMetrics to create backups since they are performed from immutable [instant snapshots](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-work-with-snapshots).
|
||||
* `http://victoriametrics:8428/snapshot/create` is the url for creating snapshots according to [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-work-with-snapshots). `vmbackup` creates a snapshot by querying the provided `-snapshot.createURL`, then performs the backup and then automatically removes the created snapshot.
|
||||
* `<bucket>` is an already existing name for [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets).
|
||||
* `<path/to/new/backup>` is the destination path where new backup will be placed.
|
||||
|
||||
### Regular backups with server-side copy from existing backup
|
||||
### Full backups with server-side copy from existing backup {id="regular-backups-with-server-side-copy-from-existing-backup"}
|
||||
|
||||
If the destination GCS bucket already contains the previous backup at `-origin` path, then new backup can be accelerated
|
||||
with the following command:
|
||||
@@ -63,11 +99,10 @@ with the following command:
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup> -origin=gs://<bucket>/<path/to/existing/backup>
|
||||
```
|
||||
|
||||
It saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
|
||||
Typical object storage just creates new names for already existing objects when performing server-side copy,
|
||||
so this operation should be fast and inexpensive. Unfortunately, there are object storage systems such as [S3 Glacier](https://aws.amazon.com/s3/storage-classes/glacier/),
|
||||
which make full copies for the copied objects during server-side copy. This may significantly slow down server-side copy
|
||||
and make it very expensive.
|
||||
It saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst` since backup data isn't transferred
|
||||
between the remote storage and locally running `vmbackup` tool.
|
||||
|
||||
Please see notes in [server-side copy of the existing backup](https://docs.victoriametrics.com/victoriametrics/vmbackup/#server-side-copy-of-the-existing-backup) for specifics depending on cloud storage provider you are using.
|
||||
|
||||
### Incremental backups
|
||||
|
||||
@@ -79,8 +114,8 @@ It saves time and network bandwidth costs when working with big backups:
|
||||
```
|
||||
|
||||
### Smart backups
|
||||
|
||||
Smart backups mean storing full daily backups into `YYYYMMDD` folders and creating incremental hourly backup into `latest` folder:
|
||||
Smart backup is a backup strategy that combines full and incremental backups with cleanup operations to efficiently manage data backups based on defined settings and available storage space.
|
||||
In case of VictoriaMetrics smart backups mean storing full daily backups into `YYYYMMDD` folders and creating incremental hourly backup into `latest` folder:
|
||||
|
||||
* Run the following command every hour:
|
||||
|
||||
@@ -88,9 +123,7 @@ Smart backups mean storing full daily backups into `YYYYMMDD` folders and creati
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/latest
|
||||
```
|
||||
|
||||
This command creates an [instant snapshot](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-work-with-snapshots)
|
||||
and uploads it to `gs://<bucket>/latest`. It uploads only the changed data (aka incremental backup). This saves network bandwidth costs and time
|
||||
when backing up large amounts of data.
|
||||
This command creates an [instant snapshot](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-work-with-snapshots) and uploads it to `gs://<bucket>/latest`. It uploads only the changed data (aka incremental backup). This saves network bandwidth costs and time when backing up large amounts of data.
|
||||
|
||||
* Run the following command once a day:
|
||||
|
||||
@@ -99,13 +132,9 @@ when backing up large amounts of data.
|
||||
```
|
||||
|
||||
This command makes [server-side copy](#server-side-copy-of-the-existing-backup) of the backup from `gs://<bucket>/latest` to `gs://<bucket>/<YYYYMMDD>`,
|
||||
were `<YYYYMMDD>` is the current date like `20240125`. Server-side copy of the backup should be fast on most object storage systems,
|
||||
since it just creates new names for already existing objects. The server-side copy can be slow on some object storage systems
|
||||
such as [S3 Glacier](https://aws.amazon.com/s3/storage-classes/glacier/), since they may perform full object copy instead of creating
|
||||
new names for already existing objects. This may be slow and expensive.
|
||||
were `<YYYYMMDD>` is the current date like `20240125`.
|
||||
|
||||
The `smart backups` approach described above saves network bandwidth costs on hourly backups (since they are incremental)
|
||||
and allows recovering data from either the last hour (the `latest` backup) or from any day (`YYYYMMDD` backups).
|
||||
The `smart backups` approach described above saves network bandwidth costs on hourly backups (since they are incremental) and allows recovering data from either the last hour (the `latest` backup) or from any day (`YYYYMMDD` backups).
|
||||
|
||||
Note that hourly backup shouldn't run when creating daily backup.
|
||||
|
||||
@@ -115,9 +144,7 @@ See also [vmbackupmanager tool](https://docs.victoriametrics.com/victoriametrics
|
||||
|
||||
### Server-side copy of the existing backup
|
||||
|
||||
Sometimes it is needed to make server-side copy of the existing backup. This can be done by specifying the source backup path via `-origin` command-line flag,
|
||||
while the destination path for backup copy must be specified via `-dst` command-line flag. For example, the following command copies backup
|
||||
from `gs://bucket/foo` to `gs://bucket/bar`:
|
||||
Sometimes it is necessary to make server-side copy of the existing backup. This can be done by specifying the source backup path via `-origin` command-line flag, while the destination path for backup copy must be specified via `-dst` command-line flag. For example, the following command copies backup from `gs://bucket/foo` to `gs://bucket/bar`:
|
||||
|
||||
```sh
|
||||
./vmbackup -origin=gs://bucket/foo -dst=gs://bucket/bar
|
||||
@@ -126,29 +153,12 @@ from `gs://bucket/foo` to `gs://bucket/bar`:
|
||||
The `-origin` and `-dst` must point to the same object storage bucket or to the same filesystem.
|
||||
|
||||
The server-side backup copy is usually performed at much faster speed comparing to the usual backup, since backup data isn't transferred
|
||||
between the remote storage and locally running `vmbackup` tool. Object storage systems usually just make new names for already existing
|
||||
objects during server-side copy. Unfortunately there are systems such as [S3 Glacier](https://aws.amazon.com/s3/storage-classes/glacier/),
|
||||
which perform full object copy during server-side copying. This may be slow and expensive.
|
||||
between the remote storage and locally running `vmbackup` tool. Some object storage systems might just make new names for already existing
|
||||
objects during server-side copy. However, some systems such as [S3 Glacier](https://aws.amazon.com/s3/storage-classes/glacier/),
|
||||
perform a full object copy during server-side copying, which be slow and expensive. Please verify your storage system provider's documentation for specific behavior.
|
||||
|
||||
If the `-dst` already contains some data, then its' contents is synced with the `-origin` data. This allows making incremental server-side copies of backups.
|
||||
|
||||
### Backups for VictoriaMetrics cluster
|
||||
|
||||
`vmbackup` can be used for creating backups for [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/).
|
||||
In order to perform a complete backup for the cluster, `vmbackup` must be run on each `vmstorage` node in cluster. Backups must
|
||||
be placed into different directories on the remote storage in order to avoid conflicts between backups from different nodes.
|
||||
|
||||
For example, when creating a backup with 3 `vmstorage` nodes, the following commands must be run:
|
||||
|
||||
```sh
|
||||
vmstorage-1$ /vmbackup -storageDataPath=</path/to/vmstorage-data> -snapshot.createURL=http://vmstorage1:8482/snapshot/create -dst=gs://<bucket>/vmstorage-1
|
||||
vmstorage-2$ /vmbackup -storageDataPath=</path/to/vmstorage-data> -snapshot.createURL=http://vmstorage2:8482/snapshot/create -dst=gs://<bucket>/vmstorage-2
|
||||
vmstorage-3$ /vmbackup -storageDataPath=</path/to/vmstorage-data> -snapshot.createURL=http://vmstorage3:8482/snapshot/create -dst=gs://<bucket>/vmstorage-3
|
||||
````
|
||||
|
||||
Note that `vmbackup` needs access to data folder of every `vmstorage` node. It is recommended to run `vmbackup` on the same machine where `vmstorage` is running.
|
||||
For Kubernetes deployments it is recommended to use [sidecar containers](https://kubernetes.io/docs/concepts/workloads/pods/sidecar-containers/) for running `vmbackup` on the same pod with `vmstorage`.
|
||||
|
||||
## How does it work?
|
||||
|
||||
The backup algorithm is the following:
|
||||
@@ -163,7 +173,7 @@ The backup algorithm is the following:
|
||||
1. Delete the created snapshot.
|
||||
|
||||
The algorithm splits source files into 1 GiB chunks in the backup. Each chunk is stored as a separate file in the backup.
|
||||
Such splitting balances between the number of files in the backup and the amounts of data that needs to be re-transferred after temporary errors.
|
||||
Such splitting balances between the number of files in the backup and the amount of data that needs to be re-transferred after temporary errors.
|
||||
|
||||
`vmbackup` relies on [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) properties:
|
||||
|
||||
@@ -178,33 +188,32 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* If the backup is slow, then try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
|
||||
* If `vmbackup` eats all the network bandwidth or CPU, then either decrease the `-concurrency` command-line flag value or set `-maxBytesPerSecond` command-line flag value to lower value.
|
||||
* If the backup is slow, try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
|
||||
* If `vmbackup` consumes all the network bandwidth or CPU, then either decrease the `-concurrency` command-line flag value or set `-maxBytesPerSecond` command-line flag value to lower value.
|
||||
* If `vmbackup` consumes all the CPU on systems with big number of CPU cores, then try running it with `-filestream.disableFadvise` command-line flag.
|
||||
* If `vmbackup` has been interrupted due to temporary error, then just restart it with the same args. It will resume the backup process. After backup process has finished successfully, please remove old snapshot that was created during failed attempt.
|
||||
* If `vmbackup` has been interrupted due to temporary error, then just restart it with the same args. It will resume the backup process. After backup process has finished successfully, please [remove old snapshot](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#delete-snapshot) that was created during failed attempt.
|
||||
* Backups created from [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) cannot be restored
|
||||
at [cluster VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) and vice versa.
|
||||
* Please find description how snapshots use disk space and recommendations in [snapshot troubleshooting](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#snapshot-troubleshooting)
|
||||
|
||||
## Advanced usage
|
||||
|
||||
|
||||
### Providing credentials as a file
|
||||
|
||||
Obtaining credentials from a file.
|
||||
|
||||
Add flag `-credsFilePath=/etc/credentials` with the following content:
|
||||
|
||||
- for S3 (AWS, MinIO or other S3 compatible storages):
|
||||
|
||||
* for S3 (AWS, MinIO or other S3 compatible storages):
|
||||
|
||||
```sh
|
||||
[default]
|
||||
aws_access_key_id=theaccesskey
|
||||
aws_secret_access_key=thesecretaccesskeyvalue
|
||||
```
|
||||
|
||||
- for GCP cloud storage:
|
||||
|
||||
* for GCP cloud storage:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "service_account",
|
||||
@@ -220,16 +229,17 @@ Add flag `-credsFilePath=/etc/credentials` with the following content:
|
||||
}
|
||||
```
|
||||
|
||||
### Providing credentials via env variables
|
||||
### Providing credentials via env variables
|
||||
|
||||
Obtaining credentials from env variables.
|
||||
- For AWS S3 compatible storages set env variable `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
|
||||
|
||||
* For AWS S3 compatible storages set env variable `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
|
||||
Also you can set env variable `AWS_SHARED_CREDENTIALS_FILE` with path to credentials file.
|
||||
- For GCE cloud storage set env variable `GOOGLE_APPLICATION_CREDENTIALS` with path to credentials file.
|
||||
- For Azure storage use one of these env variables:
|
||||
- `AZURE_STORAGE_ACCOUNT_CONNECTION_STRING`: use a connection string (must be either SAS Token or Account/Key)
|
||||
- `AZURE_STORAGE_ACCOUNT_NAME` and `AZURE_STORAGE_ACCOUNT_KEY`: use a specific account name and key (either primary or secondary)
|
||||
- `AZURE_USE_DEFAULT_CREDENTIAL` and `AZURE_STORAGE_ACCOUNT_NAME`: use the `DefaultAzureCredential` to allow the Azure library
|
||||
* For GCE cloud storage set env variable `GOOGLE_APPLICATION_CREDENTIALS` with path to credentials file.
|
||||
* For Azure storage use one of these env variables:
|
||||
* `AZURE_STORAGE_ACCOUNT_CONNECTION_STRING`: use a connection string (must be either SAS Token or Account/Key)
|
||||
* `AZURE_STORAGE_ACCOUNT_NAME` and `AZURE_STORAGE_ACCOUNT_KEY`: use a specific account name and key (either primary or secondary)
|
||||
* `AZURE_USE_DEFAULT_CREDENTIAL` and `AZURE_STORAGE_ACCOUNT_NAME`: use the `DefaultAzureCredential` to allow the Azure library
|
||||
to search for multiple options (for example, managed identity related variables). Note that if multiple credentials are available,
|
||||
it is required to specify the `AZURE_CLIENT_ID` to select specific credentials.
|
||||
|
||||
@@ -239,13 +249,13 @@ Please, note that `vmbackup` will use credentials provided by cloud providers me
|
||||
|
||||
### Using cloud providers metadata service
|
||||
|
||||
`vmbackup` and `vmbackupmanager` will automatically use cloud providers metadata service in order to obtain credentials if they are running in cloud environment
|
||||
and credentials are not explicitly provided via flags or env variables.
|
||||
`vmbackup` and `vmbackupmanager` will automatically use cloud providers metadata service in order to obtain credentials if they are running in cloud environment and credentials are not explicitly provided via flags or env variables.
|
||||
|
||||
### Providing credentials in Kubernetes
|
||||
|
||||
The simplest way to provide credentials in Kubernetes is to use [Secrets](https://kubernetes.io/docs/concepts/configuration/secret/)
|
||||
and inject them into the pod as environment variables. For example, the following secret can be used for AWS S3 credentials:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
@@ -255,7 +265,9 @@ data:
|
||||
access_key: key
|
||||
secret_key: secret
|
||||
```
|
||||
|
||||
And then it can be injected into the pod as environment variables:
|
||||
|
||||
```yaml
|
||||
...
|
||||
env:
|
||||
@@ -272,10 +284,11 @@ env:
|
||||
...
|
||||
```
|
||||
|
||||
A more secure way is to use IAM roles to provide tokens for pods instead of managing credentials manually.
|
||||
A more secure way is to use IAM roles to provide tokens for pods instead of managing credentials manually.
|
||||
|
||||
For AWS deployments it will be required to configure [IAM roles for service accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html).
|
||||
In order to use IAM roles for service accounts with `vmbackup` or `vmbackupmanager` it is required to create ServiceAccount with IAM role mapping:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
@@ -284,11 +297,13 @@ metadata:
|
||||
annotations:
|
||||
eks.amazonaws.com/role-arn: arn:aws:iam::{ACCOUNT_ID}:role/{ROLE_NAME}
|
||||
```
|
||||
|
||||
And [configure pod to use service account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/).
|
||||
After this `vmbackup` and `vmbackupmanager` will automatically use IAM role for service account in order to obtain credentials.
|
||||
|
||||
For GCP deployments it will be required to configure [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity).
|
||||
In order to use Workload Identity with `vmbackup` or `vmbackupmanager` it is required to create ServiceAccount with Workload Identity annotation:
|
||||
|
||||
```yaml
|
||||
---
|
||||
apiVersion: v1
|
||||
@@ -298,6 +313,7 @@ metadata:
|
||||
annotations:
|
||||
iam.gke.io/gcp-service-account: {sa_name}@{project_name}.iam.gserviceaccount.com
|
||||
```
|
||||
|
||||
And [configure pod to use service account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/).
|
||||
After this `vmbackup` and `vmbackupmanager` will automatically use Workload Identity for service account in order to obtain credentials.
|
||||
|
||||
@@ -306,20 +322,21 @@ After this `vmbackup` and `vmbackupmanager` will automatically use Workload Iden
|
||||
Usage with s3 custom url endpoint. It is possible to use `vmbackup` with s3 compatible storages like minio, cloudian, etc.
|
||||
You have to add a custom url endpoint via flag:
|
||||
|
||||
- for MinIO
|
||||
* for MinIO
|
||||
|
||||
```sh
|
||||
-customS3Endpoint=http://localhost:9000
|
||||
```
|
||||
|
||||
- for aws gov region
|
||||
* for aws gov region
|
||||
|
||||
```sh
|
||||
-customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com
|
||||
```
|
||||
|
||||
### Permanent deletion of objects in S3-compatible storages
|
||||
|
||||
`vmbackup` and [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/) use standard delete operation
|
||||
for S3-compatible object storage when performing [incremental backups](#incremental-backups).
|
||||
`vmbackup` and [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/) use standard delete operation for S3-compatible object storage when performing [incremental backups](#incremental-backups).
|
||||
This operation removes only the current version of the object. This works OK in most cases.
|
||||
|
||||
Sometimes it is needed to remove all the versions of an object. In this case pass `-deleteAllObjectVersions` command-line flag to `vmbackup`.
|
||||
@@ -531,7 +548,7 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - see `vmutils-*` archives there.
|
||||
It is recommended to use the [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - see `vmutils-*` archives there.
|
||||
|
||||
### Development build
|
||||
|
||||
@@ -551,9 +568,9 @@ Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` dock
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```sh
|
||||
ROOT_IMAGE=scratch make package-vmbackup
|
||||
```
|
||||
|
||||
|
||||
@@ -17,19 +17,19 @@ aliases:
|
||||
|
||||
***vmbackupmanager is a part of [enterprise package](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
It is available for download and evaluation at [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).***
|
||||
See how to request a [free trial license](https://victoriametrics.com/products/enterprise/trial/).***
|
||||
|
||||
The VictoriaMetrics backup manager automates regular backup procedures. It supports the following backup intervals: **hourly**, **daily**, **weekly** and **monthly**.
|
||||
Multiple backup intervals may be configured simultaneously. I.e. the backup manager creates hourly backups every hour, while it creates daily backups every day, etc.
|
||||
Backup manager must have read access to the storage data, so best practice is to install it on the same machine (or as a sidecar) where the storage node is installed.
|
||||
The backup service makes a backup every hour and puts it to the latest folder and then copies data to the folders
|
||||
which represent the backup intervals (hourly, daily, weekly and monthly)
|
||||
The backup service makes a backup every hour, places it to the latest folder and then copies data to the folders
|
||||
that represent the backup intervals (hourly, daily, weekly and monthly)
|
||||
|
||||
The required flags for running the service are as follows:
|
||||
|
||||
* `-license` or `-licenseFile` . See [these docs](https://docs.victoriametrics.com/victoriametrics/enterprise/#running-victoriametrics-enterprise).
|
||||
* `-storageDataPath` - path to VictoriaMetrics or vmstorage data path to make backup from.
|
||||
* `-snapshot.createURL` - VictoriaMetrics creates snapshot URL which will automatically be created during backup. Example: http://victoriametrics:8428/snapshot/create
|
||||
* `-snapshot.createURL` - VictoriaMetrics creates snapshot URL which will automatically be created during backup. Example: <http://victoriametrics:8428/snapshot/create>
|
||||
* `-dst` - backup destination at [the supported storage types](https://docs.victoriametrics.com/victoriametrics/vmbackup/#supported-storage-types).
|
||||
* `-credsFilePath` - path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See [https://cloud.google.com/iam/docs/creating-managing-service-account-keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys)
|
||||
@@ -52,16 +52,17 @@ The backup manager creates the following directory hierarchy at `-dst`:
|
||||
* `/weekly/` - contains weekly backups. Each backup is named as `YYYY-WW`
|
||||
* `/monthly/` - contains monthly backups. Each backup is named as `YYYY-MM`
|
||||
|
||||
The `vmbackupmanager` takes backups every hour if hourly backups are not disabled; otherwise,
|
||||
it defaults to taking backups every 24 hours at 00:00 in UTC timezone.
|
||||
You can control the schedule using the `-backupInterval` and `-backupScheduleTimezone` command-line flags.
|
||||
The `-backupScheduleTimezone` flag specifies the timezone to use for scheduling daily, weekly, and monthly backups.
|
||||
The `vmbackupmanager` takes backups every hour if hourly backups are not disabled; otherwise,
|
||||
it defaults to taking backups every 24 hours at 00:00 in UTC timezone.
|
||||
You can control the schedule using the `-backupInterval` and `-backupScheduleTimezone` command-line flags.
|
||||
The `-backupScheduleTimezone` flag specifies the timezone to use for scheduling daily, weekly, and monthly backups.
|
||||
Note that overriding `-backupInterval` means daily, weekly, and monthly backups will be taken at specified intervals
|
||||
and not daily at midnight.
|
||||
|
||||
For example:
|
||||
- if you want to take backups three times per day, set `-backupInterval=8h`
|
||||
- if you want to take backups daily at midnight in `Europe/Paris` timezone, set `-backupScheduleTimezone="Europe/Paris"`
|
||||
|
||||
* if you want to take backups three times per day, set `-backupInterval=8h`
|
||||
* if you want to take backups daily at midnight in `Europe/Paris` timezone, set `-backupScheduleTimezone="Europe/Paris"`
|
||||
|
||||
To get the full list of supported flags please run the following command:
|
||||
|
||||
@@ -156,9 +157,11 @@ Backup retention policy is controlled by:
|
||||
* `-keepLastWeekly` - keep the last N weekly backups. Disabled by default
|
||||
* `-keepLastMonthly` - keep the last N monthly backups. Disabled by default
|
||||
|
||||
> *Note*: 0 value in every keepLast flag results into deletion of ALL backups for particular type (hourly, daily, weekly and monthly)
|
||||
> `0` value in every `keepLast*` flag results into deletion of ALL backups for particular type (hourly, daily, weekly and monthly)
|
||||
|
||||
> *Note*: retention policy does not enforce removing previous versions of objects in object storages such if versioning is enabled. See [these docs](https://docs.victoriametrics.com/victoriametrics/vmbackup/#permanent-deletion-of-objects-in-s3-compatible-storages) for more details.
|
||||
> Retention policy does not enforce removing previous versions of objects in object storages if versioning is enabled. See [permanent deletion of objects in s3 compatible-storages](https://docs.victoriametrics.com/victoriametrics/vmbackup/#permanent-deletion-of-objects-in-s3-compatible-storages) for more details.
|
||||
|
||||
> It is possible to enforce retention by using object storage lifecycle rules. Please, see [retention by using object storage lifecycle rules](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/#retention-by-using-object-storage-lifecycle-rules) for more details.
|
||||
|
||||
Let’s assume we have a backup manager collecting daily backups for the past 10 days.
|
||||
|
||||
@@ -187,7 +190,16 @@ info app/vmbackupmanager/retention.go:106 daily backups to delete [daily/2
|
||||
|
||||
The result on the GCS bucket. We see only 3 daily backups:
|
||||
|
||||
[retention policy daily after retention cycle](vmbackupmanager_rp_daily_2.webp "retention policy daily after retention cycle")
|
||||

|
||||
|
||||
#### Retention by using object storage lifecycle rules
|
||||
|
||||
It is possible to enforce retention by using [object storage lifecycle rules](https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-lifecycle-mgmt.html).
|
||||
In order to do that it is required not use `keepLast*` flags in `vmbackupmanager` and configure lifecycle rules
|
||||
in your object storage to remove objects under `/hourly/`, `/daily/`, `/weekly/` and `/monthly/` prefixes.
|
||||
|
||||
Note that `/latest/` prefix must be excluded from lifecycle rules as it saves files with original modification time.
|
||||
This means that files under `/latest/` prefix will be removed by lifecycle rules if they are older than specified in the rules.
|
||||
|
||||
#### Protection backups against deletion by retention policy
|
||||
|
||||
@@ -221,40 +233,48 @@ For example:
|
||||
* POST `/api/v1/backups` - schedule/create the backup. Response examples:
|
||||
|
||||
success, status code - 201, body:
|
||||
|
||||
```json
|
||||
{}
|
||||
```
|
||||
|
||||
failure, status code - 400, body:
|
||||
|
||||
```json
|
||||
{"error": "backups <name> is in progress"}
|
||||
```
|
||||
|
||||
* GET `/api/v1/backups` - returns list of backups in remote storage.
|
||||
Response example:
|
||||
|
||||
```json
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
> Note: `created_at` field is in RFC3339 format.
|
||||
|
||||
> `created_at` field is in RFC3339 format.
|
||||
|
||||
* PUT `/api/v1/backups/<BACKUP_NAME>` - update "locked" attribute for backup by name.
|
||||
Example request body:
|
||||
|
||||
```json
|
||||
{"locked":true}
|
||||
```
|
||||
|
||||
Example response:
|
||||
|
||||
```json
|
||||
{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00", "locked": true, "state": "incomplete"}
|
||||
```
|
||||
|
||||
* POST `/api/v1/restore` - saves backup name to restore when [performing restore](#restore-commands).
|
||||
Example request body:
|
||||
|
||||
```json
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
|
||||
* GET `/api/v1/restore` - returns backup name from restore mark if it exists.
|
||||
Example response:
|
||||
|
||||
```json
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
@@ -266,10 +286,11 @@ For example:
|
||||
`vmbackupmanager` exposes CLI commands to work with [API methods](#api-methods) without external dependencies.
|
||||
|
||||
Supported commands:
|
||||
```sh
|
||||
vmbackupmanager backup
|
||||
|
||||
vmbackupmanager backup list
|
||||
```sh
|
||||
vmbackupmanager backup
|
||||
|
||||
vmbackupmanager backup list
|
||||
List backups in remote storage
|
||||
|
||||
vmbackupmanager backup lock
|
||||
@@ -278,13 +299,13 @@ vmbackupmanager backup
|
||||
vmbackupmanager backup unlock
|
||||
Unlocks backup in remote storage for deletion
|
||||
|
||||
vmbackupmanager restore
|
||||
Restore backup specified by restore mark if it exists
|
||||
vmbackupmanager restore
|
||||
Restore backup specified by restore mark if it exists
|
||||
|
||||
vmbackupmanager restore get
|
||||
vmbackupmanager restore get
|
||||
Get restore mark if it exists
|
||||
|
||||
vmbackupmanager restore delete
|
||||
vmbackupmanager restore delete
|
||||
Delete restore mark if it exists
|
||||
|
||||
vmbackupmanager restore create [backup_name]
|
||||
@@ -293,6 +314,7 @@ vmbackupmanager restore
|
||||
|
||||
By default, CLI commands are using `http://127.0.0.1:8300` endpoint to reach `vmbackupmanager` API.
|
||||
It can be changed by using flag:
|
||||
|
||||
```
|
||||
-apiURL string
|
||||
vmbackupmanager address to perform API requests (default "http://127.0.0.1:8300")
|
||||
@@ -301,8 +323,9 @@ It can be changed by using flag:
|
||||
#### Backup commands
|
||||
|
||||
`vmbackupmanager backup list` lists backups in remote storage:
|
||||
|
||||
```sh
|
||||
$ ./vmbackupmanager backup list
|
||||
./vmbackupmanager backup list
|
||||
[{"name":"latest","size_bytes":466996,"size":"456.1ki","created_at":"2025-04-25T15:48:49Z","locked":false,"state":"complete"},{"name":"weekly/2025-17","size_bytes":466996,"size":"456.1ki","created_at":"2025-04-25T15:48:49Z","locked":false,"state":"complete"}]
|
||||
```
|
||||
|
||||
@@ -311,27 +334,31 @@ $ ./vmbackupmanager backup list
|
||||
Restore commands are used to create, get and delete restore mark.
|
||||
Restore mark is used by `vmbackupmanager` to store backup name to restore when running restore.
|
||||
|
||||
|
||||
Create restore mark:
|
||||
|
||||
```sh
|
||||
$ ./vmbackupmanager restore create daily/2022-10-06
|
||||
./vmbackupmanager restore create daily/2022-10-06
|
||||
```
|
||||
|
||||
Get restore mark if it exists:
|
||||
|
||||
```sh
|
||||
$ ./vmbackupmanager restore get
|
||||
./vmbackupmanager restore get
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
|
||||
Delete restore mark if it exists:
|
||||
|
||||
```sh
|
||||
$ ./vmbackupmanager restore delete
|
||||
./vmbackupmanager restore delete
|
||||
```
|
||||
|
||||
Perform restore:
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod restore -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data
|
||||
./vmbackupmanager-prod restore -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data
|
||||
```
|
||||
|
||||
Note that `vmsingle` or `vmstorage` should be stopped before performing restore.
|
||||
|
||||
If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vmbackupmanager restore` will exit with successful status code.
|
||||
@@ -339,53 +366,68 @@ If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vm
|
||||
### How to restore backup via CLI
|
||||
|
||||
1. Run `vmbackupmanager backup list` to get list of available backups:
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod backup list
|
||||
./vmbackupmanager-prod backup list
|
||||
[{"name":"latest","size_bytes":466996,"size":"456.1ki","created_at":"2025-04-25T15:48:49Z","locked":false,"state":"complete"},{"name":"weekly/2025-17","size_bytes":466996,"size":"456.1ki","created_at":"2025-04-25T15:48:49Z","locked":false,"state":"complete"}]
|
||||
```
|
||||
|
||||
1. Run `vmbackupmanager restore create` to create restore mark:
|
||||
- Use relative path to backup to restore from currently used remote storage:
|
||||
* Use relative path to backup to restore from currently used remote storage:
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod restore create weekly/2025-17
|
||||
./vmbackupmanager-prod restore create weekly/2025-17
|
||||
```
|
||||
- Use full path to backup to restore from any remote storage:
|
||||
|
||||
* Use full path to backup to restore from any remote storage:
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod restore create azblob://test1/vmbackupmanager/weekly/2025-17
|
||||
./vmbackupmanager-prod restore create azblob://test1/vmbackupmanager/weekly/2025-17
|
||||
```
|
||||
|
||||
1. Stop `vmstorage` or `vmsingle` node
|
||||
1. Run `vmbackupmanager restore` to restore backup:
|
||||
```sh
|
||||
$ /vmbackupmanager-prod restore -credsFilePath=credentials.json -storageDataPath=/vmstorage-data
|
||||
```
|
||||
1. Start `vmstorage` or `vmsingle` node
|
||||
|
||||
```sh
|
||||
./vmbackupmanager-prod restore -credsFilePath=credentials.json -storageDataPath=/vmstorage-data
|
||||
```
|
||||
|
||||
1. Start `vmstorage` or `vmsingle` node
|
||||
|
||||
#### How to restore in Kubernetes
|
||||
|
||||
1. Ensure there is an init container with `vmbackupmanager restore` in `vmstorage` or `vmsingle` pod.
|
||||
For [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/) deployments it is required to add:
|
||||
|
||||
```yaml
|
||||
vmbackup:
|
||||
restore:
|
||||
onStart:
|
||||
enabled: "true"
|
||||
```
|
||||
See operator `VMStorage` schema [here](https://docs.victoriametrics.com/operator/api/#vmstorage) and `VMSingle` [here](https://docs.victoriametrics.com/operator/api/#vmsinglespec).
|
||||
See operator [VMStorage](https://docs.victoriametrics.com/operator/api/#vmstorage) and [VMSingle](https://docs.victoriametrics.com/operator/api/#vmsinglespec) specs.
|
||||
1. Enter container running `vmbackupmanager`
|
||||
1. Use `vmbackupmanager backup list` to get list of available backups:
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod backup list
|
||||
./vmbackupmanager-prod backup list
|
||||
[{"name":"latest","size_bytes":466996,"size":"456.1ki","created_at":"2025-04-25T15:48:49Z","locked":false,"state":"complete"},{"name":"weekly/2025-17","size_bytes":466996,"size":"456.1ki","created_at":"2025-04-25T15:48:49Z","locked":false,"state":"complete"}]
|
||||
```
|
||||
|
||||
1. Use `vmbackupmanager restore create` to create restore mark:
|
||||
- Use relative path to backup to restore from currently used remote storage:
|
||||
|
||||
* Use relative path to backup to restore from currently used remote storage:
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod restore create weekly/2025-17
|
||||
./vmbackupmanager-prod restore create weekly/2025-17
|
||||
```
|
||||
- Use full path to backup to restore from any remote storage:
|
||||
|
||||
* Use full path to backup to restore from any remote storage:
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod restore create azblob://test1/vmbackupmanager/weekly/2025-17
|
||||
./vmbackupmanager-prod restore create azblob://test1/vmbackupmanager/weekly/2025-17
|
||||
```
|
||||
|
||||
1. Restart pod
|
||||
|
||||
##### Restore cluster into another cluster
|
||||
@@ -394,38 +436,41 @@ These steps are assuming that [VictoriaMetrics operator](https://docs.victoriame
|
||||
Clusters here are referred to as `source` and `destination`.
|
||||
|
||||
1. Create a new cluster with access to *source* cluster `vmbackupmanager` storage and same number of storage nodes.
|
||||
Add the following section in order to enable restore on start (operator `VMStorage` schema can be found [here](https://docs.victoriametrics.com/operator/api/#vmstorage):
|
||||
Add the following section to enable restore on start (see operator's [VMStorage spec](https://docs.victoriametrics.com/operator/api/#vmstorage)):
|
||||
```yaml
|
||||
vmbackup:
|
||||
restore:
|
||||
onStart:
|
||||
enabled: "true"
|
||||
```
|
||||
|
||||
Note: it is safe to leave this section in the cluster configuration, since it will be ignored if restore mark doesn't exist.
|
||||
> Important! Use different `-dst` for *destination* cluster to avoid overwriting backup data of the *source* cluster.
|
||||
1. Enter container running `vmbackupmanager` in *source* cluster
|
||||
1. Use `vmbackupmanager backup list` to get list of available backups:
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod backup list
|
||||
./vmbackupmanager-prod backup list
|
||||
[{"name":"latest","size_bytes":466996,"size":"456.1ki","created_at":"2025-04-25T15:48:49Z","locked":false,"state":"complete"},{"name":"weekly/2025-17","size_bytes":466996,"size":"456.1ki","created_at":"2025-04-25T15:48:49Z","locked":false,"state":"complete"}]
|
||||
```
|
||||
|
||||
1. Use `vmbackupmanager restore create` to create restore mark at each pod of the *destination* cluster.
|
||||
Each pod in *destination* cluster should be restored from backup of respective pod in *source* cluster.
|
||||
For example: `vmstorage-source-0` in *source* cluster should be restored from `vmstorage-destination-0` in *destination* cluster.
|
||||
|
||||
```sh
|
||||
$ /vmbackupmanager-prod restore create s3://source_cluster/vmstorage-source-0/weekly/2025-17
|
||||
./vmbackupmanager-prod restore create s3://source_cluster/vmstorage-source-0/weekly/2025-17
|
||||
```
|
||||
|
||||
1. Restart `vmstorage` pods of *destination* cluster. On pod start `vmbackupmanager` will restore data from the specified backup.
|
||||
|
||||
### Monitoring
|
||||
|
||||
`vmbackupmanager` exports various metrics in Prometheus exposition format at `http://vmbackupmanager:8300/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
`vmbackupmanager` exports various metrics in Prometheus exposition format at `http://vmbackupmanager:8300/metrics` page. It is recommended to set up regular scraping of this page either via [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/17798) for `vmbackupmanager` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add a review to the dashboard.
|
||||
|
||||
### Configuration
|
||||
|
||||
|
||||
@@ -34,9 +34,9 @@ vmctl command-line tool is available as:
|
||||
|
||||
Download and unpack vmctl:
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.125.1/vmutils-darwin-arm64-v1.125.1.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.126.0/vmutils-darwin-arm64-v1.126.0.tar.gz
|
||||
|
||||
tar xzf vmutils-darwin-arm64-v1.125.1.tar.gz
|
||||
tar xzf vmutils-darwin-arm64-v1.126.0.tar.gz
|
||||
```
|
||||
|
||||
Once binary is unpacked, see the full list of supported modes by running the following command:
|
||||
|
||||
@@ -13,9 +13,9 @@ aliases:
|
||||
- /vmgateway/index.html
|
||||
- /vmgateway/
|
||||
---
|
||||
***vmgateway is a part of [enterprise package](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
***vmgateway is a part of [enterprise package](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
It is available for download and evaluation at [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).***
|
||||
See how to request a free [trial license](https://victoriametrics.com/products/enterprise/trial/).***
|
||||
|
||||

|
||||
|
||||
@@ -39,6 +39,7 @@ See how to request a free trial license [here](https://victoriametrics.com/produ
|
||||
jwt token must be in one of the following formats:
|
||||
|
||||
with `vm_access` claim as JSON object
|
||||
|
||||
```json
|
||||
{
|
||||
"exp": 1617304574,
|
||||
@@ -46,6 +47,7 @@ with `vm_access` claim as JSON object
|
||||
"tenant_id": {
|
||||
"account_id": 1,
|
||||
"project_id": 5
|
||||
|
||||
},
|
||||
"extra_labels": {
|
||||
"team": "dev",
|
||||
@@ -203,12 +205,14 @@ Tokens with unsupported algorithms will be rejected.
|
||||
|
||||
In order to enable JWT signature verification, you need to specify keys for signature verification.
|
||||
The following flags are used to specify keys:
|
||||
- `-auth.publicKeyFiles` - allows to pass file path to file with public key.
|
||||
- `-auth.publicKeys` - allows to pass public key directly.
|
||||
|
||||
* `-auth.publicKeyFiles` - allows to pass file path to file with public key.
|
||||
* `-auth.publicKeys` - allows to pass public key directly.
|
||||
|
||||
Note that both flags support passing multiple keys and also can be used together.
|
||||
|
||||
Example usage:
|
||||
|
||||
```sh
|
||||
./bin/vmgateway -licenseFile=/path/to/vm-license \
|
||||
-enable.auth \
|
||||
@@ -227,6 +231,7 @@ mwIDAQAB
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
```
|
||||
|
||||
This command will result in 3 keys loaded: 2 keys from files and 1 from command line.
|
||||
|
||||
### Using OpenID discovery endpoint for JWT signature verification
|
||||
@@ -237,6 +242,7 @@ In order to enable [OpenID discovery](https://openid.net/specs/openid-connect-di
|
||||
When `auth.oidcDiscoveryEndpoints` is specified `vmgateway` will fetch JWKS keys from the specified endpoint and use them for JWT signature verification.
|
||||
|
||||
Example usage for tokens issued by Azure Active Directory:
|
||||
|
||||
```sh
|
||||
/bin/vmgateway -licenseFile=/path/to/vm-license \
|
||||
-enable.auth \
|
||||
@@ -246,6 +252,7 @@ Example usage for tokens issued by Azure Active Directory:
|
||||
```
|
||||
|
||||
Example usage for tokens issued by Google:
|
||||
|
||||
```sh
|
||||
/bin/vmgateway -licenseFile=/path/to/vm-license \
|
||||
-enable.auth \
|
||||
@@ -262,6 +269,7 @@ In order to enable JWKS endpoint for JWT signature verification, you need to spe
|
||||
When `auth.jwksEndpoints` is specified `vmgateway` will fetch public keys from the specified endpoint and use them for JWT signature verification.
|
||||
|
||||
Example usage for tokens issued by Azure Active Directory:
|
||||
|
||||
```sh
|
||||
/bin/vmgateway -licenseFile=/path/to/vm-license \
|
||||
-enable.auth \
|
||||
@@ -271,6 +279,7 @@ Example usage for tokens issued by Azure Active Directory:
|
||||
```
|
||||
|
||||
Example usage for tokens issued by Google:
|
||||
|
||||
```sh
|
||||
/bin/vmgateway -licenseFile=/path/to/vm-license \
|
||||
-enable.auth \
|
||||
|
||||
@@ -6,7 +6,7 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used peridically to ensure the flags in sync. -->
|
||||
<!-- The file has to be manually updated during feature work in PR, make docs-update-flags command could be used periodically to ensure the flags in sync. -->
|
||||
```shellhelp
|
||||
|
||||
vminsert accepts data via popular data ingestion protocols and routes it to vmstorage nodes configured via -storageNode.
|
||||
|
||||
@@ -14,8 +14,7 @@ aliases:
|
||||
---
|
||||
`vmrestore` restores data from backups created by [vmbackup](https://docs.victoriametrics.com/victoriametrics/vmbackup/).
|
||||
|
||||
Restore process can be interrupted at any time. It is automatically resumed from the interruption point
|
||||
when restarting `vmrestore` with the same args.
|
||||
Restore process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmrestore` with the same args.
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -42,16 +41,15 @@ Run the following command to restore backup from the given `-src` into the given
|
||||
The original `-storageDataPath` directory may contain old files. They will be substituted by the files from backup,
|
||||
i.e. the end result would be similar to [rsync --delete](https://askubuntu.com/questions/476041/how-do-i-make-rsync-delete-files-that-have-been-deleted-from-the-source-folder).
|
||||
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* See [how to setup credentials via environment variables](https://docs.victoriametrics.com/victoriametrics/vmbackup/#providing-credentials-via-env-variables).
|
||||
* If `vmrestore` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
|
||||
* If `vmrestore` consumes all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
|
||||
* If `vmrestore` has been interrupted due to temporary error, then just restart it with the same args. It will resume the restore process.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
Please, see [vmbackup docs](https://docs.victoriametrics.com/victoriametrics/vmbackup/#advanced-usage) for examples of authentication
|
||||
Please, see [vmbackup docs](https://docs.victoriametrics.com/victoriametrics/vmbackup/#advanced-usage) for examples of authentication
|
||||
with different storage types.
|
||||
|
||||
Run `vmrestore -help` in order to see all the available options:
|
||||
@@ -235,13 +233,13 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - see `vmutils-*` archives there.
|
||||
It is recommended to use the [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - see `vmutils-*` archives there.
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install).
|
||||
1. Run `make vmrestore` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmrestore` binary and puts it into the `bin` folder.
|
||||
It builds `vmrestore` binary and places it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
@@ -255,8 +253,7 @@ Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` do
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```sh
|
||||
ROOT_IMAGE=scratch make package-vmrestore
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user