mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-28 14:07:06 +03:00
Compare commits
7 Commits
issue-1098
...
move-stora
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3a0c6e0ce5 | ||
|
|
255365db50 | ||
|
|
4065fce536 | ||
|
|
08d4273d22 | ||
|
|
ef83198eb1 | ||
|
|
f61b632469 | ||
|
|
e4c7b557fd |
@@ -22,7 +22,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -30,21 +29,11 @@ var (
|
||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
|
||||
"equal to -dedup.minScrapeInterval > 0. See also -streamAggr.dedupInterval and https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running VictoriaMetrics. The following config files are checked: "+
|
||||
"-promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. "+
|
||||
"This can be changed with -promscrape.config.strictParse=false command-line flag")
|
||||
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
|
||||
"The saved data survives unclean shutdowns such as OOM crash, hardware reset, SIGKILL, etc. "+
|
||||
"Bigger intervals may help increase the lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
|
||||
"Smaller intervals increase disk IO load. Minimum supported value is 1s")
|
||||
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmsingle can receive per second. Data ingestion is paused when the limit is exceeded. "+
|
||||
"By default there are no limits on samples ingestion rate.")
|
||||
finalDedupScheduleInterval = flag.Duration("storage.finalDedupScheduleCheckInterval", time.Hour, "The interval for checking when final deduplication process should be started."+
|
||||
"Storage unconditionally adds 25% jitter to the interval value on each check evaluation."+
|
||||
" Changing the interval to the bigger values may delay downsampling, deduplication for historical data."+
|
||||
" See also https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication")
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -87,12 +76,6 @@ func main() {
|
||||
}
|
||||
logger.Infof("starting VictoriaMetrics at %q...", listenAddrs)
|
||||
startTime := time.Now()
|
||||
storage.SetDedupInterval(*minScrapeInterval)
|
||||
storage.SetDataFlushInterval(*inmemoryDataFlushInterval)
|
||||
if *finalDedupScheduleInterval < time.Hour {
|
||||
logger.Fatalf("-dedup.finalDedupScheduleCheckInterval cannot be smaller than 1 hour; got %s", *finalDedupScheduleInterval)
|
||||
}
|
||||
storage.SetFinalDedupScheduleInterval(*finalDedupScheduleInterval)
|
||||
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmselect.Init()
|
||||
vminsertcommon.StartIngestionRateLimiter(*maxIngestionRate)
|
||||
|
||||
@@ -113,15 +113,15 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
// because correct types must be inherited after unmarshalling.
|
||||
exprValidator := g.Type.ValidateExpr
|
||||
if err := exprValidator(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
|
||||
}
|
||||
}
|
||||
if validateTplFn != nil {
|
||||
if err := validateTplFn(r.Annotations); err != nil {
|
||||
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
|
||||
}
|
||||
if err := validateTplFn(r.Labels); err != nil {
|
||||
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ func TestParse_Failure(t *testing.T) {
|
||||
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
||||
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
||||
f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set")
|
||||
f([]string{"testdata/rules/rules1-bad.rules"}, "bad graphite expr")
|
||||
f([]string{"testdata/rules/rules1-bad.rules"}, "bad GraphiteQL expr")
|
||||
f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr")
|
||||
f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header")
|
||||
f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields")
|
||||
@@ -283,7 +283,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
Expr: "up | 0",
|
||||
},
|
||||
},
|
||||
}, true, "bad prometheus expr")
|
||||
}, true, "bad MetricsQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test graphite expr",
|
||||
@@ -293,7 +293,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
"description": "some-description",
|
||||
}},
|
||||
},
|
||||
}, true, "bad graphite expr")
|
||||
}, true, "bad GraphiteQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs expr",
|
||||
@@ -327,7 +327,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
},
|
||||
},
|
||||
}, true, "bad graphite expr")
|
||||
}, true, "bad GraphiteQL expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs with prometheus exp",
|
||||
@@ -351,7 +351,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
},
|
||||
}, true, "bad prometheus expr")
|
||||
}, true, "bad MetricsQL expr")
|
||||
}
|
||||
|
||||
func TestGroupValidate_Success(t *testing.T) {
|
||||
|
||||
@@ -66,11 +66,11 @@ func (t *Type) ValidateExpr(expr string) error {
|
||||
switch t.String() {
|
||||
case "graphite":
|
||||
if _, err := graphiteql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad graphite expr: %q, err: %w", expr, err)
|
||||
return fmt.Errorf("bad GraphiteQL expr: %q, err: %w", expr, err)
|
||||
}
|
||||
case "prometheus":
|
||||
if _, err := metricsql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
|
||||
return fmt.Errorf("bad MetricsQL expr: %q, err: %w", expr, err)
|
||||
}
|
||||
case "vlogs":
|
||||
q, err := logstorage.ParseStatsQuery(expr, 0)
|
||||
|
||||
@@ -51,6 +51,12 @@ var (
|
||||
retentionTimezoneOffset = flag.Duration("retentionTimezoneOffset", 0, "The offset for performing indexdb rotation. "+
|
||||
"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. "+
|
||||
"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)")
|
||||
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
|
||||
"equal to -dedup.minScrapeInterval > 0. See also -streamAggr.dedupInterval and https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication")
|
||||
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
|
||||
"The saved data survives unclean shutdowns such as OOM crash, hardware reset, SIGKILL, etc. "+
|
||||
"Bigger intervals may help increase the lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
|
||||
"Smaller intervals increase disk IO load. Minimum supported value is 1s")
|
||||
|
||||
logNewSeries = flag.Bool("logNewSeries", false, "Whether to log new series. This option is for debug purposes only. It can lead to performance issues "+
|
||||
"when big number of new series are ingested into VictoriaMetrics")
|
||||
@@ -68,6 +74,11 @@ var (
|
||||
|
||||
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 100e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data")
|
||||
|
||||
finalDedupScheduleInterval = flag.Duration("storage.finalDedupScheduleCheckInterval", time.Hour, "The interval for checking when final deduplication process should be started."+
|
||||
"Storage unconditionally adds 25% jitter to the interval value on each check evaluation."+
|
||||
" Changing the interval to the bigger values may delay downsampling, deduplication for historical data."+
|
||||
" See also https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication")
|
||||
|
||||
cacheSizeStorageTSID = flagutil.NewBytes("storage.cacheSizeStorageTSID", 0, "Overrides max size for storage/tsid cache. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cache-tuning")
|
||||
cacheSizeStorageMetricName = flagutil.NewBytes("storage.cacheSizeStorageMetricName", 0, "Overrides max size for storage/metricName cache. "+
|
||||
@@ -111,11 +122,16 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||
logger.Fatalf("invalid `-precisionBits`: %s", err)
|
||||
}
|
||||
|
||||
resetResponseCacheIfNeeded = resetCacheIfNeeded
|
||||
storage.SetDedupInterval(*minScrapeInterval)
|
||||
storage.SetDataFlushInterval(*inmemoryDataFlushInterval)
|
||||
storage.LegacySetRetentionTimezoneOffset(*retentionTimezoneOffset)
|
||||
storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N)
|
||||
storage.SetTSIDCacheSize(cacheSizeStorageTSID.IntN())
|
||||
storage.SetTagFiltersCacheSize(cacheSizeIndexDBTagFilters.IntN())
|
||||
if *finalDedupScheduleInterval < time.Hour {
|
||||
logger.Fatalf("-storage.finalDedupScheduleCheckInterval cannot be smaller than 1 hour; got %s", *finalDedupScheduleInterval)
|
||||
}
|
||||
storage.SetFinalDedupScheduleInterval(*finalDedupScheduleInterval)
|
||||
storage.SetMetricNamesStatsCacheSize(cacheSizeMetricNamesStats.IntN())
|
||||
storage.SetMetricNameCacheSize(cacheSizeStorageMetricName.IntN())
|
||||
storage.SetMetadataStorageSize(metadataStorageSize.IntN())
|
||||
@@ -134,9 +150,9 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||
if *idbPrefillStart > 23*time.Hour {
|
||||
logger.Panicf("-storage.idbPrefillStart cannot exceed 23 hours; got %s", idbPrefillStart)
|
||||
}
|
||||
fs.RegisterPathFsMetrics(*storageDataPath)
|
||||
logger.Infof("opening storage at %q with -retentionPeriod=%s", *storageDataPath, retentionPeriod)
|
||||
startTime := time.Now()
|
||||
WG = syncwg.WaitGroup{}
|
||||
opts := storage.OpenOptions{
|
||||
Retention: retentionPeriod.Duration(),
|
||||
FutureRetention: futureRetention.Duration(),
|
||||
@@ -149,7 +165,6 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||
LogNewSeries: *logNewSeries,
|
||||
}
|
||||
strg := storage.MustOpenStorage(*storageDataPath, opts)
|
||||
Storage = strg
|
||||
initStaleSnapshotsRemover(strg)
|
||||
|
||||
var m storage.Metrics
|
||||
@@ -168,7 +183,10 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||
writeStorageMetrics(w, strg)
|
||||
})
|
||||
metrics.RegisterSet(storageMetrics)
|
||||
fs.RegisterPathFsMetrics(*storageDataPath)
|
||||
|
||||
WG = syncwg.WaitGroup{}
|
||||
resetResponseCacheIfNeeded = resetCacheIfNeeded
|
||||
Storage = strg
|
||||
}
|
||||
|
||||
var storageMetrics *metrics.Set
|
||||
@@ -340,6 +358,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
startTime := time.Now()
|
||||
if err := Storage.ForceMergePartitions(partitionNamePrefix); err != nil {
|
||||
logger.Errorf("error in forced merge for partition_prefix=%q: %s", partitionNamePrefix, err)
|
||||
return
|
||||
}
|
||||
logger.Infof("forced merge for partition_prefix=%q has been successfully finished in %.3f seconds", partitionNamePrefix, time.Since(startTime).Seconds())
|
||||
}()
|
||||
@@ -353,6 +372,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
Storage.DebugFlush()
|
||||
return true
|
||||
}
|
||||
|
||||
if path == "/internal/log_new_series" {
|
||||
if !httpserver.CheckAuthFlag(w, r, logNewSeriesAuthKey) {
|
||||
return true
|
||||
|
||||
@@ -88,6 +88,7 @@ These skills provide predefined workflows and capabilities such as:
|
||||
* Multi-signal investigations
|
||||
* Cardinality optimization
|
||||
* Unused metric detection
|
||||
* Stream aggregation configuration
|
||||
|
||||
To install the available skills for AI agents, run:
|
||||
```sh
|
||||
|
||||
@@ -150,8 +150,12 @@ You can experiment with your own data during the month‑long trial without depl
|
||||
are fast-booting Linux microVMs that run on a fleet of large bare-metal servers. You can start a playground right from your browser.
|
||||
Once up and running, accessing a playground is no different from SSH-ing into a remote server rented from your favorite VPS or Cloud provider.
|
||||
|
||||
Iximiuz Labs provides playgrounds for VictoriaMetrics software:
|
||||
- [VictoriaMetrics single node (on Ubuntu)](https://labs.iximiuz.com/playgrounds/victoriametrics-e2f9b613)
|
||||
- [VictoriaMetrics cluster (on Ubuntu)](https://labs.iximiuz.com/playgrounds/victoriametrics-cluster-8eacb19d)
|
||||
- [Getting Started with VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/tutorials/victoriametrics-getting-started-kubernetes-0e9c0993)
|
||||
- [VictoriaMetrics Operator](https://labs.iximiuz.com/playgrounds/victoriametrics-kubernetes-9eebc258)
|
||||
Iximiuz Labs provides various [learning-by-doing resources for VictoriaMetrics](https://labs.iximiuz.com/v/victoriametrics-bb1fdaa1):
|
||||
- Tutorial:
|
||||
- [Getting Started with VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/tutorials/victoriametrics-getting-started-kubernetes-0e9c0993)
|
||||
- Playgrounds:
|
||||
- [VictoriaMetrics single node](https://labs.iximiuz.com/playgrounds/victoriametrics-e2f9b613)
|
||||
- [VictoriaMetrics cluster](https://labs.iximiuz.com/playgrounds/victoriametrics-cluster-8eacb19d)
|
||||
- [VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/playgrounds/victoriametrics-kubernetes-9eebc258)
|
||||
|
||||
Iximiuz Labs requires a [free account](https://labs.iximiuz.com/signup) to access the materials.
|
||||
@@ -43,6 +43,9 @@ Just download VictoriaMetrics and follow [these instructions](https://docs.victo
|
||||
See [available integrations](https://docs.victoriametrics.com/victoriametrics/integrations/) with other systems like
|
||||
[Prometheus](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus/) or [Grafana](https://docs.victoriametrics.com/victoriametrics/integrations/grafana/).
|
||||
|
||||
> Want to see VictoriaMetrics in action, but without installing anything?
|
||||
> Try [Playgrounds](https://docs.victoriametrics.com/playgrounds/) - a list of publicly available playgrounds for VictoriaMetrics software.
|
||||
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended to periodically check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/)
|
||||
and perform [regular upgrades](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
|
||||
|
||||
|
||||
@@ -59,6 +59,10 @@ Once connected, you can build graphs and dashboards using [PromQL](https://prome
|
||||
_Creating a datasource may require [specific permissions](https://grafana.com/docs/grafana/latest/administration/data-source-management/).
|
||||
If you don't see an option to create a data source - try contacting system administrator._
|
||||
|
||||
If you run [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) and want to see its rules in [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/),
|
||||
then set configure `-vmalert.proxyURL` on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
|
||||
|
||||
## Multi-tenant access with vmauth and OIDC
|
||||
|
||||
[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) can proxy Grafana datasource requests and enforce
|
||||
|
||||
@@ -26,6 +26,7 @@ Stream aggregation has the following features:
|
||||
and/or scraped from [Prometheus-compatible targets](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter)
|
||||
- It can filter out raw samples matched by aggregation rules, so raw data will never reach the remote destination. See `-streamAggr.keepInput` and `-streamAggr.dropInput` in [aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/);
|
||||
- It allows building [flexible processing pipelines](#routing);
|
||||
- It is [horizontally scalable](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#scaling-aggregation-horizontally).
|
||||
|
||||
# Limitations
|
||||
|
||||
@@ -598,6 +599,47 @@ Below is an example of an `aggr.yaml` configuration that drops the `replica` and
|
||||
keep_metric_names: true
|
||||
```
|
||||
|
||||
## Scaling aggregation horizontally
|
||||
|
||||
Aggregation output is only correct when all contributing samples are processed by the same aggregator instance.
|
||||
|
||||
To scale the aggregation horizontally, always shard the input samples in a deterministic way. This can be achieved by
|
||||
building a two layer topology of vmagents where the first layer is responsible for sharding, and the second layer is responsible for aggregating:
|
||||
```mermaid
|
||||
flowchart LR
|
||||
V1[vmagent-shard-1] -- requests_total{env=test, pod=foo} --> SV1[vmagent-aggr-1]
|
||||
V1[vmagent-shard-1] -- requests_total{env=prod, pod=bar} --> SV2[vmagent-aggr-1]
|
||||
V2[vmagent-shard-2] -- requests_total{env=prod, pod=baz} --> SV2[vmagent-aggr-2]
|
||||
SV1 -- requests_total:5m_without_pod_total{env=test} --> x(( ))
|
||||
SV2 -- requests_total:5m_without_pod_total{env=prod} --> y(( ))
|
||||
style x fill:none,stroke:none
|
||||
style y fill:none,stroke:none
|
||||
```
|
||||
|
||||
The sharding layer of vmagents can be configured via the `-remoteWrite.shardByURL.labels` or `-remoteWrite.shardByURL.ignoreLabels`
|
||||
command line flags. See how to [shard data across remote write destinations](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages) for more details.
|
||||
|
||||
The following requirements must be met for sharded aggregation to work correctly:
|
||||
- All sharding vmagents should have the same deterministic sharding configuration.
|
||||
- The sharding configuration must align with the `by` and `without` lists:
|
||||
- Labels listed in `by` setting should be a subset of shard's routing key `-remoteWrite.shardByURL.labels`.
|
||||
With `-remoteWrite.shardByURL.labels=env,job` aggregator's `by` should include `by: env`, `by: job` or both: `by: [env, job]`.
|
||||
This makes sure that all the samples for the same `env` and `job` are aggregated together and produce the complete output.
|
||||
- Labels listed in `without` setting should be a superset of shard's routing key `--remoteWrite.shardByURL.ignoreLabels`.
|
||||
With `-remoteWrite.shardByURL.ignoreLabels=env,job` aggegator's `without` should include at least both labels `without: [env,job]`.
|
||||
This makes sure that `requests_total{env=test, job=foo}` and `requests_total{env=prod, job=foo}` are routed to the same aggregator
|
||||
and are aggregated together. See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938#issuecomment-2018470324).
|
||||
- Aggregating vmagents should not produce collisions: the aggregation output should be unique across all the sharded agents.
|
||||
For example, `requests_total:5m_without_env_pod_total` produced by both `vmagent-aggr-1` and `vmagent-aggr-2` will collide
|
||||
unless they have labels uniquely identifying them. These labels should be either preserved during sharding and aggregation config,
|
||||
or enforced on the output via `-remoteWrite.label` - see [these docs](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#cluster-mode) for more details.
|
||||
|
||||
> Never shard histograms by `le` (or `vmrange` in case of VM histograms) label. A histogram is a logical group of series differing
|
||||
only in the bucket label. All of those buckets must land on the same aggregator at the same time so it can produce a
|
||||
coherent bucket set. See more about [aggregating histograms](https://docs.victoriametrics.com/stream-aggregation/#aggregating-histograms).
|
||||
|
||||
See also [why you shouldn't put an aggregator behind a load balancer](https://docs.victoriametrics.com/stream-aggregation/#put-aggregator-behind-load-balancer).
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
- [Unexpected spikes for `total` or `increase` outputs](#staleness).
|
||||
|
||||
@@ -182,15 +182,15 @@ among remote storage systems specified in `-remoteWrite.url`.
|
||||
> For example, if you set `-remoteWrite.url=srv+foo` and it's resolved to three addresses (`192.168.1.1`, `192.168.1.2`, `192.168.1.3`),
|
||||
> vmagent will only choose **one** randomly every time it (re-)creates the connection. In contrast, specifying the addresses manually (`-remoteWrite.url=192.168.1.1 -remoteWrite.url=192.168.1.2 -remoteWrite.url=192.168.1.3`) will shard samples across all three URLs.
|
||||
|
||||
Sometimes, it may be necessary to use only a particular set of labels for sharding. For example, it may be necessary to route all the metrics with the same `instance` label
|
||||
to the same `-remoteWrite.url`. In this case, you can specify a comma-separated list of these labels in the `-remoteWrite.shardByURL.labels`
|
||||
command-line flag. For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance`
|
||||
label to the same `-remoteWrite.url`.
|
||||
Use `-remoteWrite.shardByURL.labels` to route metrics among `-remoteWrite.url` based on their label values.
|
||||
For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance`
|
||||
label to the same `-remoteWrite.url`. This command-line flag allows specifying a comma-separated list of labels.
|
||||
|
||||
Sometimes, it may be necessary to ignore some labels when sharding samples across multiple `-remoteWrite.url` backends.
|
||||
For example, if all the [raw samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) with the same set of labels
|
||||
except for the labels `instance` and `pod` must be routed to the same backend. In this case the list of ignored labels must be passed to
|
||||
`-remoteWrite.shardByURL.ignoreLabels` command-line flag: `-remoteWrite.shardByURL.ignoreLabels=instance,pod`.
|
||||
Alternatively, you can use `-remoteWrite.shardByURL.ignoreLabels` to route metrics among `-remoteWrite.url` based on their label values, excluding the specified labels.
|
||||
For example, `-remoteWrite.shardByURL.ignoreLabels=pod` would shard metrics `metric{pod="foo"}` and `metric{pod="bar"}` to the same `-remoteWrite.url`
|
||||
by ignoring the `pod` label. This command-line flag allows specifying a comma-separated list of labels.
|
||||
|
||||
> Command-line flags `-remoteWrite.shardByURL.labels` and `-remoteWrite.shardByURL.ignoreLabels` are mutually exclusive.
|
||||
|
||||
See also [how to scrape a large number of targets](#scraping-big-number-of-targets).
|
||||
|
||||
|
||||
@@ -21,23 +21,9 @@ Recording rules results are persisted via remote write protocols and require `-r
|
||||
`vmalert` is heavily inspired by [Prometheus](https://prometheus.io/docs/alerting/latest/overview/)
|
||||
implementation and aims to be compatible with its syntax.
|
||||
|
||||
Configure `-vmalert.proxyURL` on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert)
|
||||
to proxy requests to `vmalert`. Proxying is needed for the following cases:
|
||||
|
||||
* to proxy requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/);
|
||||
* to access `vmalert`'s UI through [vmui](https://docs.victoriametrics.com/victoriametrics/#vmui).
|
||||
|
||||
[VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_intro)
|
||||
provides out-of-the-box alerting functionality based on `vmalert`. This service simplifies the setup
|
||||
and management of alerting and recording rules as well as the integration with Alertmanager. For more details,
|
||||
please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/).
|
||||
|
||||
## Features
|
||||
|
||||
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) and [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/);
|
||||
* Integration with [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) and [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/);
|
||||
* Integration with [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) which also uses [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victoriatraces/vmalert/);
|
||||
* Integration with VictoriaMetrics, VictoriaLogs, VictoriaTraces, Graphite and Prometheus compatible storages. See [Integrations](https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations) for details;
|
||||
* Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules)
|
||||
support;
|
||||
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager) starting from [Alertmanager v0.16.0-alpha](https://github.com/prometheus/alertmanager/releases/tag/v0.16.0-alpha.0);
|
||||
@@ -62,8 +48,8 @@ please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriam
|
||||
|
||||
To start using `vmalert` you will need the following things:
|
||||
|
||||
* list of rules - PromQL/MetricsQL expressions to execute;
|
||||
* datasource address - reachable endpoint with [Prometheus HTTP API](https://prometheus.io/docs/prometheus/latest/querying/api/#http-api) support for running queries against;
|
||||
* list of rules - PromQL/MetricsQL/LogsQL/GraphiteQL expressions to execute;
|
||||
* datasource address - a storage that [vmalert integrates with](https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations) for executing queries;
|
||||
* notifier address [optional] - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing,
|
||||
aggregating alerts, and sending notifications. Please note, notifier address also supports Consul and DNS Service Discovery via
|
||||
[config file](https://docs.victoriametrics.com/victoriametrics/vmalert/#notifier-configuration-file).
|
||||
@@ -73,7 +59,7 @@ To start using `vmalert` you will need the following things:
|
||||
* remote read address [optional] - MetricsQL compatible datasource to restore alerts state from.
|
||||
|
||||
You can use the existing [docker-compose environment](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#victoriametrics-single-server)
|
||||
as example. It already contains vmalert configured with list of alerting rules and integrated with Alert Manager and VictoriaMetrics.
|
||||
as an example. It already contains vmalert configured with the list of alerting rules and integrated with Alert Manager and VictoriaMetrics.
|
||||
|
||||
Alternatively, build `vmalert` from sources:
|
||||
|
||||
@@ -87,7 +73,7 @@ Then run `vmalert`:
|
||||
|
||||
```sh
|
||||
./bin/vmalert -rule=alert.rules \ # Path to the file with rules configuration. Supports wildcard and HTTP URL (S3/GCS are available in Enterprise).
|
||||
-datasource.url=http://localhost:8428 \ # Prometheus HTTP API compatible datasource
|
||||
-datasource.url=http://localhost:8428 \ # VictoriaMetrics URL to query for rules evaluation. See other available Integrations above.
|
||||
-notifier.url=http://localhost:9093 \ # AlertManager URL (required if alerting rules are used)
|
||||
-notifier.url=http://127.0.0.1:9093 \ # AlertManager replica URL
|
||||
-remoteWrite.url=http://localhost:8428 \ # Remote write compatible storage to persist rules and alerts state info (required if recording rules are used)
|
||||
@@ -107,7 +93,7 @@ See also [stream aggregation](https://docs.victoriametrics.com/victoriametrics/s
|
||||
|
||||
See the full list of configuration flags in [configuration](#configuration) section.
|
||||
|
||||
If you run multiple `vmalert` services on the same datastore or AlertManager and need to distinguish the results or alerts,
|
||||
If you run multiple `vmalert` services for the same datasource or AlertManager and need to distinguish the results or alerts,
|
||||
specify different `-external.label` command-line flags to indicate which `vmalert` generated them.
|
||||
If rule result metrics have label that conflict with `-external.label`, `vmalert` will automatically rename
|
||||
it with prefix `exported_`.
|
||||
@@ -116,6 +102,7 @@ Configuration for [recording](https://prometheus.io/docs/prometheus/latest/confi
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
similar to Prometheus rules and configured using YAML. Configuration examples may be found
|
||||
in [testdata](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/config/testdata) folder.
|
||||
|
||||
Every `rule` belongs to a `group` and every configuration file may contain arbitrary number of groups:
|
||||
|
||||
```yaml
|
||||
@@ -123,13 +110,7 @@ groups:
|
||||
[ - <rule_group> ]
|
||||
```
|
||||
|
||||
> Explore how to integrate `vmalert` with [VictoriaMetrics Anomaly Detection](https://docs.victoriametrics.com/anomaly-detection/) in the following [guide](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/).
|
||||
|
||||
> For users of [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_config),
|
||||
> many of the configuration steps (including highly available setup of `vmalert` for cluster deployments) are handled automatically.
|
||||
> Please, refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/) for more details.
|
||||
|
||||
### Groups
|
||||
## Groups
|
||||
|
||||
Each group has the following attributes:
|
||||
|
||||
@@ -230,9 +211,11 @@ rules:
|
||||
|
||||
### Rules
|
||||
|
||||
Every rule contains `expr` field for [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/)
|
||||
or [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) expression. `vmalert` will execute the configured
|
||||
expression and then act according to the Rule type.
|
||||
Every rule contains an `expr` field for the expression to evaluate against the configured datasource.
|
||||
Depending on `group.type` value or `-rule.defaultRuleType` cmd-line flag expression can be one of the following types:
|
||||
- `prometheus` (default) - [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/) or [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) expression.
|
||||
- `vlogs` - [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/vmalert/) expression.
|
||||
- `graphite` - [Graphite](https://graphite.readthedocs.io/en/stable/render_api.html) expression.
|
||||
|
||||
There are two types of Rules:
|
||||
|
||||
@@ -244,8 +227,7 @@ There are two types of Rules:
|
||||
`-remoteWrite.url`. Recording rules are used to precompute frequently needed or computationally
|
||||
expensive expressions and save their result as a new set of time series ([Prometheus recording rules docs](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)).
|
||||
|
||||
`vmalert` forbids defining duplicates - rules with the same combination of name, expression, and labels
|
||||
within one group.
|
||||
> `vmalert` forbids defining duplicates - rules with the same combination of name, expression and labels within one group.
|
||||
|
||||
#### Alerting rules
|
||||
|
||||
@@ -256,8 +238,8 @@ The syntax for alerting rule is the following:
|
||||
alert: <string>
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default, PromQL/MetricsQL expression is used. If group.type="graphite", then the expression
|
||||
# must contain valid Graphite expression.
|
||||
# By default, PromQL/MetricsQL expression is used. Other available types are "graphite" and "vlogs".
|
||||
# See https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations
|
||||
expr: <string>
|
||||
|
||||
# Alerts are considered firing once they have been returned for this long.
|
||||
@@ -303,7 +285,44 @@ annotations:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
```
|
||||
|
||||
#### Templating
|
||||
#### Recording rules
|
||||
|
||||
The syntax for recording rules is the following:
|
||||
|
||||
```yaml
|
||||
# The name of the time series to output to. Must be a valid metric name.
|
||||
record: <string>
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default, PromQL/MetricsQL expression is used. Other available types are "graphite" and "vlogs".
|
||||
# See https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations
|
||||
expr: <string>
|
||||
|
||||
# Labels to add or overwrite labels from other external label sources, such as group labels, before storing the result.
|
||||
#
|
||||
# In case of conflicts, original labels are kept with prefix `exported_`.
|
||||
# As a special case, specifying a label with an empty string value removes the label from the result if it exists
|
||||
# in the original query result; otherwise, it is ignored.
|
||||
#
|
||||
# Labels do not support templating in https://docs.victoriametrics.com/victoriametrics/vmalert/#templating due to cardinality concerns. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8171.
|
||||
labels:
|
||||
[ <labelname>: <labelvalue> ]
|
||||
|
||||
# Whether to print debug information into logs.
|
||||
# Information includes requests sent to the datasource.
|
||||
# information - it will be printed to logs.
|
||||
# Logs are printed with INFO level, so make sure that -loggerLevel=INFO to see the output.
|
||||
[ debug: <bool> | default = false ]
|
||||
|
||||
# Defines the number of rule updates entries stored in memory
|
||||
# and available for view on rule Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
```
|
||||
|
||||
For recording rules to work `-remoteWrite.url` must be specified.
|
||||
|
||||
## Templating
|
||||
|
||||
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations and labels(with limited support) to format data, iterate over
|
||||
or execute expressions.
|
||||
@@ -325,7 +344,7 @@ The following variables are available in templating:
|
||||
|
||||
Additionally, `vmalert` provides some extra templating functions listed in [template functions](#template-functions) and [reusable templates](#reusable-templates).
|
||||
|
||||
#### Template functions
|
||||
### Template functions
|
||||
|
||||
`vmalert` provides the following template functions, which can be used during [templating](#templating):
|
||||
|
||||
@@ -349,7 +368,7 @@ Additionally, `vmalert` provides some extra templating functions listed in [temp
|
||||
* `parseDurationTime` - parses the input string into [time.Duration](https://pkg.go.dev/time#Duration).
|
||||
* `pathEscape` - escapes the input string, so it can be safely put inside path part of URL.
|
||||
* `pathPrefix` - returns the path part of the `-external.url` command-line flag.
|
||||
* `query` - executes the [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) query against `-datasource.url` and returns the query result.
|
||||
* `query` - executes query against `-datasource.url` and returns the query result.
|
||||
For example, `{{ query "sort_desc(process_resident_memory_bytes)" | first | value }}` executes the `sort_desc(process_resident_memory_bytes)`
|
||||
query at `-datasource.url` and returns the first result.
|
||||
* `queryEscape` - escapes the input string, so it can be safely put inside [query arg](https://en.wikipedia.org/wiki/Percent-encoding) part of URL.
|
||||
@@ -369,7 +388,7 @@ Additionally, `vmalert` provides some extra templating functions listed in [temp
|
||||
* `toUpper` - converts all the chars in the input string to uppercase.
|
||||
* `value` - returns the numeric value from the input query result.
|
||||
|
||||
#### Reusable templates
|
||||
### Reusable templates
|
||||
|
||||
Like in Alertmanager you can define [reusable templates](https://prometheus.io/docs/prometheus/latest/configuration/template_examples/#defining-reusable-templates)
|
||||
to share same templates across annotations. Just define the templates in a file and
|
||||
@@ -407,44 +426,8 @@ groups:
|
||||
The `-rule.templates` flag supports wildcards so multiple files with templates can be loaded.
|
||||
The content of `-rule.templates` can be also [hot reloaded](#hot-config-reload).
|
||||
|
||||
#### Recording rules
|
||||
|
||||
The syntax for recording rules is following:
|
||||
|
||||
```yaml
|
||||
# The name of the time series to output to. Must be a valid metric name.
|
||||
record: <string>
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default, MetricsQL expression is used. If group.type="graphite", then the expression
|
||||
# must contain valid Graphite expression.
|
||||
expr: <string>
|
||||
|
||||
# Labels to add or overwrite labels from other external label sources, such as group labels, before storing the result.
|
||||
#
|
||||
# In case of conflicts, original labels are kept with prefix `exported_`.
|
||||
# As a special case, specifying a label with an empty string value removes the label from the result if it exists
|
||||
# in the original query result; otherwise, it is ignored.
|
||||
#
|
||||
# Labels do not support templating in https://docs.victoriametrics.com/victoriametrics/vmalert/#templating due to cardinality concerns. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8171.
|
||||
labels:
|
||||
[ <labelname>: <labelvalue> ]
|
||||
|
||||
# Whether to print debug information into logs.
|
||||
# Information includes requests sent to the datasource.
|
||||
# information - it will be printed to logs.
|
||||
# Logs are printed with INFO level, so make sure that -loggerLevel=INFO to see the output.
|
||||
[ debug: <bool> | default = false ]
|
||||
|
||||
# Defines the number of rule's updates entries stored in memory
|
||||
# and available for view on rule's Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
```
|
||||
|
||||
For recording rules to work `-remoteWrite.url` must be specified.
|
||||
|
||||
### Alerts state on restarts
|
||||
## Alerts state on restarts
|
||||
|
||||
`vmalert` holds alerts state in the memory. Restart of the `vmalert` process will reset the state of all active alerts
|
||||
in the memory. To prevent `vmalert` from losing the state on restarts configure it to persist the state
|
||||
@@ -465,7 +448,7 @@ in configured `-remoteRead.url`, weren't updated in the last `1h` (controlled by
|
||||
or received state doesn't match current `vmalert` rules configuration. `vmalert` marks successfully restored rules
|
||||
with `restored` label in [web UI](#web).
|
||||
|
||||
### Link to alert source
|
||||
## Link to alert source
|
||||
|
||||
Alerting notifications sent by vmalert always contain a `source` link. By default, the link format
|
||||
is the following `http://<vmalert-addr>/vmalert/alert?group_id=<group_id>&alert_id=<alert_id>`. On click, it opens
|
||||
@@ -501,7 +484,9 @@ In addition to `source` link, some extra links could be added to alert's [annota
|
||||
field. See [how we use them](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/9751ea10983d42068487624849cac7ad6fd7e1d8/deployment/docker/rules/alerts-cluster.yml#L44)
|
||||
to link alerting rule and the corresponding panel on Grafana dashboard.
|
||||
|
||||
### Multitenancy
|
||||
## Multitenancy
|
||||
|
||||
> See how to use [multitenancy in rules for VictoriaLogs](https://docs.victoriametrics.com/victorialogs/vmalert/#how-to-use-multitenancy-in-rules).
|
||||
|
||||
There are the following approaches exist for alerting and recording rules across
|
||||
[multiple tenants](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy):
|
||||
@@ -559,7 +544,7 @@ The enterprise version of vmalert is available in `vmutils-*-enterprise.tar.gz`
|
||||
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) and in `*-enterprise`
|
||||
tags at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert/tags) and [Quay](https://quay.io/repository/victoriametrics/vmalert?tab=tags).
|
||||
|
||||
### Reading rules from object storage
|
||||
## Reading rules from object storage
|
||||
|
||||
[Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/) of `vmalert` may read alerting and recording rules
|
||||
from object storage:
|
||||
@@ -578,7 +563,7 @@ The following [command-line flags](#flags) can be used for fine-tuning access to
|
||||
* `-s3.customEndpoint` - custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set.
|
||||
* `-s3.forcePathStyle` - prefixing endpoint with bucket name when set false, true by default.
|
||||
|
||||
### Topology examples
|
||||
## Topology examples
|
||||
|
||||
The following sections are showing how `vmalert` may be used and configured
|
||||
for different scenarios.
|
||||
@@ -589,7 +574,7 @@ Please note, not all flags in examples are required:
|
||||
you have recording rules or want to store [alerts state](#alerts-state-on-restarts) on `vmalert` restarts;
|
||||
* `-notifier.url` is optional and is needed only if you have alerting rules.
|
||||
|
||||
#### Single-node VictoriaMetrics
|
||||
### Single-node VictoriaMetrics
|
||||
|
||||
The simplest configuration where one single-node VM server is used for
|
||||
rules execution, storing recording rules results and alerts state.
|
||||
@@ -607,7 +592,7 @@ rules execution, storing recording rules results and alerts state.
|
||||

|
||||
{width="500"}
|
||||
|
||||
#### Cluster VictoriaMetrics
|
||||
### Cluster VictoriaMetrics
|
||||
|
||||
In [cluster mode](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
|
||||
VictoriaMetrics has separate components for writing and reading path:
|
||||
@@ -630,7 +615,7 @@ Cluster mode could have multiple `vminsert` and `vmselect` components.
|
||||
In case when you want to spread the load on these components - add balancers before them and configure
|
||||
`vmalert` with balancer addresses. Please, see more about [VictoriaMetrics cluster architecture](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
|
||||
#### HA vmalert
|
||||
### HA vmalert
|
||||
|
||||
For High Availability(HA) user can run multiple identically configured `vmalert` instances.
|
||||
It means all of them will execute the same rules, write state and results to
|
||||
@@ -674,12 +659,12 @@ to ensure [high availability](https://github.com/prometheus/alertmanager#high-av
|
||||
This example uses single-node VM server for the sake of simplicity.
|
||||
Check how to replace it with [cluster VictoriaMetrics](#cluster-victoriametrics) if needed.
|
||||
|
||||
#### Downsampling and aggregation via vmalert
|
||||
### Downsampling and aggregation via vmalert
|
||||
|
||||
_Please note, [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) might be more efficient
|
||||
for cases when downsampling or aggregation need to be applied **before data gets into the TSDB.**_
|
||||
|
||||
`vmalert` can't modify existing data. But it can run arbitrary PromQL/MetricsQL queries
|
||||
`vmalert` can't modify existing data. But it can run arbitrary queries
|
||||
via [recording rules](#recording-rules) and backfill results to the configured `-remoteWrite.url`.
|
||||
This ability allows to aggregate data. For example, the following rule will calculate the average value for
|
||||
metric `http_requests` on the `5m` interval:
|
||||
@@ -732,7 +717,7 @@ Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only rec
|
||||
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) and [downsampling](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#downsampling).
|
||||
|
||||
#### Multiple remote writes
|
||||
### Multiple remote writes
|
||||
|
||||
For persisting recording or alerting rule results `vmalert` requires `-remoteWrite.url` to be set.
|
||||
But this flag supports only one destination. To persist rule results to multiple destinations
|
||||
@@ -746,7 +731,72 @@ Using `vmagent` as a proxy provides additional benefits such as
|
||||
[data persisting when storage is unreachable](https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability),
|
||||
or time series modification via [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/).
|
||||
|
||||
### Web
|
||||
## Integrations
|
||||
|
||||
vmalert can be integrated with different data sources for alerting and recording rules. But it deliberately allows
|
||||
configuring only one `datasource.url`. We recommend running separate instances of vmalert for each datasource type
|
||||
with the specified `-rule.defaultRuleType=<datasource_type>` command-line flag.
|
||||
|
||||
###### VictoriaMetrics
|
||||
|
||||
vmalert natively integrates with [VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/) for alerting and
|
||||
recording rules.
|
||||
|
||||
###### VictoriaLogs
|
||||
|
||||
vmalert integrates with [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) and allows configuring alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/).
|
||||
Results of recording rules and alerting state should be persisted to the remote-write compatible storage, such as VictoriaMetrics.
|
||||
To enable VictoriaLogs compatibility set the `-rule.defaultRuleType=vlogs` command-line flag.
|
||||
|
||||
See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/) for details.
|
||||
|
||||
###### VictoriaTraces
|
||||
|
||||
vmalert integrates with [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) in exactly the same way as
|
||||
with [VictoriaLogs](https://docs.victoriametrics.com/victoriametrics/vmalert/#victorialogs).
|
||||
|
||||
###### Graphite
|
||||
|
||||
vmalert integrates with [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) and allows configuring alerting and recording rules.
|
||||
During evaluation, vmalert will send requests to `<-datasource.url>/render?format=json`.
|
||||
To enable Graphite compatibility set the `-rule.defaultRuleType=graphite` command-line flag.
|
||||
|
||||
Since VictoriaMetrics supports both Graphite and Prometheus APIs, it is possible to mix Graphite and VictoriaMetrics rules.
|
||||
On the group level, set the `type` field to specify to which datasource type it should belong: `prometheus` (MetricsQL) or `graphite` (GraphiteQL).
|
||||
When using vmalert with both `graphite` and `prometheus` rules configured against the cluster version of VictoriaMetrics, don't forget
|
||||
to set the `-datasource.appendTypePrefix` flag to `true`, so vmalert can adjust URL prefix automatically based on the query type.
|
||||
|
||||
###### Prometheus
|
||||
|
||||
vmalert uses [Prometheus HTTP API](https://prometheus.io/docs/prometheus/latest/querying/api/#http-api) for querying
|
||||
and [Prometheus Remote Write v1 protocol](https://prometheus.io/docs/specs/prw/remote_write_spec/) for persisting
|
||||
recording rules results and alerting state. Hence, it can be integrated with any Prometheus-compatible storage
|
||||
that supports these protocols.
|
||||
|
||||
###### Grafana
|
||||
|
||||
To proxy requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/) configure `-vmalert.proxyURL`
|
||||
on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
|
||||
|
||||
###### vmui
|
||||
|
||||
To access rules UI through [vmui](https://docs.victoriametrics.com/victoriametrics/#vmui) configure `-vmalert.proxyURL`
|
||||
on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
|
||||
|
||||
###### vmanomaly
|
||||
|
||||
See how to integrate vmalert with [VictoriaMetrics Anomaly Detection](https://docs.victoriametrics.com/anomaly-detection/)
|
||||
in the following [guide](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/).
|
||||
|
||||
###### VictoriaMetrics Cloud
|
||||
|
||||
For users of [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_config),
|
||||
many of the configuration steps (including highly available setup of `vmalert` for cluster deployments) are handled automatically.
|
||||
Please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/) for more details.
|
||||
|
||||
## Web
|
||||
|
||||
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
|
||||
|
||||
@@ -771,28 +821,11 @@ This may be used for better integration with Grafana unified alerting system. Se
|
||||
* [How to query vmalert from single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
* [How to query vmalert from VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert)
|
||||
|
||||
## Graphite
|
||||
|
||||
vmalert sends requests to `<-datasource.url>/render?format=json` during evaluation of alerting and recording rules
|
||||
if the corresponding group or rule contains `type: "graphite"` config option. It is expected that the `<-datasource.url>/render`
|
||||
implements [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) for `format=json`.
|
||||
When using vmalert with both `graphite` and `prometheus` rules configured against cluster version of VM do not forget
|
||||
to set `-datasource.appendTypePrefix` flag to `true`, so vmalert can adjust URL prefix automatically based on the query type.
|
||||
|
||||
## VictoriaLogs
|
||||
|
||||
vmalert supports [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) as a datasource for writing alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/) for details.
|
||||
|
||||
## VictoriaTraces
|
||||
|
||||
vmalert supports [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) as a (`vlogs`) datasource for writing alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victoriatraces/vmalert/) for details.
|
||||
|
||||
## Rules backfilling
|
||||
|
||||
vmalert supports alerting and recording rules backfilling (aka `replay`). In replay mode vmalert
|
||||
can read the same rules configuration as normal, evaluate them on the given time range and backfill
|
||||
results via remote write to the configured storage. vmalert supports any PromQL/MetricsQL compatible
|
||||
data source for backfilling.
|
||||
results via remote write to the configured storage. vmalert supports only the `prometheus` datasource type for backfilling.
|
||||
|
||||
Please note, that response caching may lead to unexpected results during and after backfilling process.
|
||||
In order to avoid this you need to reset cache contents or disable caching when using backfilling
|
||||
@@ -852,13 +885,9 @@ vmalert respects `evaluationInterval` value set by flag or per-group during the
|
||||
vmalert automatically disables caching on VictoriaMetrics side by sending `nocache=1` param. It allows
|
||||
to prevent cache pollution and unwanted time range boundaries adjustment during backfilling.
|
||||
|
||||
#### Recording rules
|
||||
Results of recording rules `replay` should match the results of normal rules evaluation.
|
||||
|
||||
The result of recording rules `replay` should match with results of normal rules evaluation.
|
||||
|
||||
#### Alerting rules
|
||||
|
||||
The result of alerting rules `replay` is time series reflecting [alert's state](#alerts-state-on-restarts).
|
||||
Results of alerting rules `replay` are the time series reflecting the [state of the alert](#alerts-state-on-restarts).
|
||||
To see if `replayed` alert has fired in the past use the following PromQL/MetricsQL expression:
|
||||
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user