mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-07 19:06:17 +03:00
Compare commits
85 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8905bc2a40 | ||
|
|
f9847352b4 | ||
|
|
d1a9d8aa1c | ||
|
|
b93b01bc6d | ||
|
|
dbd8beccfa | ||
|
|
6b23df2bec | ||
|
|
619d4959c7 | ||
|
|
70ea4e28a7 | ||
|
|
74a2943030 | ||
|
|
b3ec0fb5e2 | ||
|
|
9a3afea123 | ||
|
|
7705c19720 | ||
|
|
80c18b7275 | ||
|
|
cf87b810b7 | ||
|
|
538fdfe133 | ||
|
|
f52769f6ee | ||
|
|
e6a782498a | ||
|
|
a441cdd1d9 | ||
|
|
d0f08b4a58 | ||
|
|
aae8064285 | ||
|
|
7e173655ba | ||
|
|
92212f04da | ||
|
|
de60ad0cd6 | ||
|
|
7a8ef517ae | ||
|
|
d61bac9fd9 | ||
|
|
eac3da478e | ||
|
|
0890c780c2 | ||
|
|
fd1a6ce9ae | ||
|
|
9b90c841c6 | ||
|
|
93c87d28f6 | ||
|
|
23c55181ef | ||
|
|
b19ca3eb5f | ||
|
|
4e850cd6a7 | ||
|
|
8cb35974af | ||
|
|
a0380a0a91 | ||
|
|
3a68c47de0 | ||
|
|
697b6af10f | ||
|
|
93267f143f | ||
|
|
3412d5d138 | ||
|
|
27f7cca7ff | ||
|
|
cf38c3c62f | ||
|
|
c56b66210f | ||
|
|
82ffbcb9a6 | ||
|
|
82ccdfaa91 | ||
|
|
ab8f5545bc | ||
|
|
0eacea1de1 | ||
|
|
737d641920 | ||
|
|
4fc33163c4 | ||
|
|
f9f3afb6af | ||
|
|
8b32e7c3a0 | ||
|
|
1573ececb2 | ||
|
|
a249cd9d22 | ||
|
|
ef0e37cb9e | ||
|
|
0afd48d2ee | ||
|
|
42866fa754 | ||
|
|
827a3a7866 | ||
|
|
606585f7be | ||
|
|
21598ac417 | ||
|
|
894e5d2b9b | ||
|
|
415b1ddfb5 | ||
|
|
db7dd96346 | ||
|
|
ba48438b06 | ||
|
|
7882a0dbbf | ||
|
|
4fe67504f9 | ||
|
|
96e001d254 | ||
|
|
faf92a0965 | ||
|
|
a6f16dcc11 | ||
|
|
cc311e20fe | ||
|
|
574289c3fb | ||
|
|
0a134ace63 | ||
|
|
8300cc17af | ||
|
|
6273385618 | ||
|
|
3232605524 | ||
|
|
18834a9191 | ||
|
|
d90dc1fbf9 | ||
|
|
dbd0c552d5 | ||
|
|
cc00a2c453 | ||
|
|
ce2107bc52 | ||
|
|
12a1a71cc1 | ||
|
|
de113806bb | ||
|
|
8c8ff5d0cb | ||
|
|
9e8733ff65 | ||
|
|
5bc5d6a1f2 | ||
|
|
baedb25936 | ||
|
|
6909d845b0 |
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
run: |
|
||||
go get -u golang.org/x/lint/golint
|
||||
go get -u github.com/kisielk/errcheck
|
||||
go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
|
||||
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.27.0
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
- name: Build
|
||||
|
||||
56
README.md
56
README.md
@@ -10,16 +10,21 @@
|
||||
|
||||
## VictoriaMetrics
|
||||
|
||||
VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus.
|
||||
VictoriaMetrics is fast, cost-effective and scalable time-series database.
|
||||
|
||||
It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
|
||||
[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and
|
||||
in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just download VictoriaMetrics and see [how to start it](#how-to-start-victoriametrics).
|
||||
|
||||
Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
|
||||
[Contact us](mailto:info@victoriametrics.com) if you need paid enterprise support for VictoriaMetrics.
|
||||
|
||||
|
||||
## Case studies and talks
|
||||
|
||||
* [Adidas](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adidas)
|
||||
* [CERN](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#cern)
|
||||
* [COLOPL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#colopl)
|
||||
* [Wix.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wixcom)
|
||||
* [Wedos.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wedoscom)
|
||||
@@ -33,6 +38,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
|
||||
|
||||
## Prominent features
|
||||
|
||||
* VictoriaMetrics can be used as long-term storage for Prometheus or for [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
|
||||
See [these docs](#prometheus-setup) for details.
|
||||
* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
|
||||
VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL) query language, which is inspired by PromQL.
|
||||
* Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
|
||||
@@ -115,6 +122,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
|
||||
* [Monitoring](#monitoring)
|
||||
* [Troubleshooting](#troubleshooting)
|
||||
* [Backfilling](#backfilling)
|
||||
* [Replication](#replication)
|
||||
* [Backups](#backups)
|
||||
* [Profiling](#profiling)
|
||||
* [Integrations](#integrations)
|
||||
* [Third-party contributions](#third-party-contributions)
|
||||
@@ -139,6 +148,8 @@ The following command-line flags are used the most:
|
||||
* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted. Default period is 1 month.
|
||||
* `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces.
|
||||
|
||||
Other flags have good enough default values, so set them only if you really need this.
|
||||
|
||||
Pass `-help` to see all the available flags with description and default values.
|
||||
|
||||
It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics.
|
||||
@@ -779,6 +790,8 @@ remote_write:
|
||||
kill -HUP `pidof prometheus`
|
||||
```
|
||||
|
||||
It is recommended to use [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) instead of Prometheus for highly loaded setups.
|
||||
|
||||
4) Now Prometheus should write data into all the configured `remote_write` urls in parallel.
|
||||
5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
|
||||
6) Set up Prometheus datasource in Grafana that points to Promxy.
|
||||
@@ -789,6 +802,7 @@ to write data to `victoriametrics-addr-1`, while each `r2` should write data to
|
||||
Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
|
||||
with the enabled de-duplication. See [this section](#deduplication) for details.
|
||||
|
||||
|
||||
### Deduplication
|
||||
|
||||
VictoriaMetrics de-duplicates data points if `-dedup.minScrapeInterval` command-line flag
|
||||
@@ -890,7 +904,8 @@ mkfs.ext4 ... -O 64bit,huge_file,extent -T huge
|
||||
### Monitoring
|
||||
|
||||
VictoriaMetrics exports internal metrics in Prometheus format at `/metrics` page.
|
||||
These metrics may be collected via Prometheus by adding the corresponding scrape config to it.
|
||||
These metrics may be collected by [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md)
|
||||
or Prometheus by adding the corresponding scrape config to it.
|
||||
Alternatively they can be self-scraped by setting `-selfScrapeInterval` command-line flag to duration greater than 0.
|
||||
For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page with 10 seconds interval.
|
||||
|
||||
@@ -901,13 +916,17 @@ The most interesting metrics are:
|
||||
|
||||
* `vm_cache_entries{type="storage/hour_metric_ids"}` - the number of time series with new data points during the last hour
|
||||
aka active time series.
|
||||
* `rate(vm_new_timeseries_created_total[5m])` - time series churn rate.
|
||||
* `vm_rows{type="indexdb"}` - the number of rows in inverted index. High value for this number usually mean high churn rate for time series.
|
||||
* Sum of `vm_rows{type="storage/big"}` and `vm_rows{type="storage/small"}` - total number of `(timestamp, value)` data points
|
||||
in the database.
|
||||
* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start.
|
||||
* `increase(vm_new_timeseries_created_total[1h])` - time series churn rate during the previous hour.
|
||||
* `sum(vm_rows{type=~"storage/.*"})` - total number of `(timestamp, value)` data points in the database.
|
||||
* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
|
||||
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
||||
* `sum(vm_data_size_bytes)` - the total data size on disk.
|
||||
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
||||
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
|
||||
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||
of the current number of active time series.
|
||||
* `increase(vm_slow_metric_name_loads_total[5m])` - the number of slow loads of metric names during the last 5 minutes.
|
||||
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||
of the current number of active time series.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
@@ -920,8 +939,9 @@ The most interesting metrics are:
|
||||
|
||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||
then it is likely you have too many active time series for the current amount of RAM.
|
||||
VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics, which could be used as an indicator of low amounts of RAM.
|
||||
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
||||
ingestion performance.
|
||||
ingestion and query performance in this case.
|
||||
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
||||
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
||||
|
||||
@@ -968,6 +988,24 @@ the query cache, which could contain incomplete data cached during the backfilli
|
||||
Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
|
||||
for data with timestamps close to the current time.
|
||||
|
||||
|
||||
### Replication
|
||||
|
||||
VictoriaMetrics relies on replicated durable persistent storage such as [Google Cloud disks](https://cloud.google.com/compute/docs/disks#pdspecs)
|
||||
or [Amazon EBS](https://aws.amazon.com/ebs/). It is also recommended making periodic backups,
|
||||
since [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883).
|
||||
See [backup docs](#backups) for details.
|
||||
|
||||
See also [high availability docs](#high-availability) and [docs about cluster version of VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md).
|
||||
|
||||
|
||||
### Backups
|
||||
|
||||
VictoriaMetrics supports backups via [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md)
|
||||
and [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md) tools.
|
||||
We also provide provide `vmbackuper` tool for paid enterprise subscribers - see [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for details.
|
||||
|
||||
|
||||
### Profiling
|
||||
|
||||
VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
|
||||
@@ -25,6 +26,8 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
|
||||
@@ -52,8 +52,9 @@ publish-vmagent:
|
||||
APP_NAME=vmagent $(MAKE) publish-via-docker
|
||||
|
||||
run-vmagent:
|
||||
mkdir -p vmagent-data
|
||||
DOCKER_OPTS='-v $(shell pwd)/vmagent-data:/vmagent-data' \
|
||||
mkdir -p vmagent-remotewrite-data
|
||||
DOCKER_OPTS='-v $(shell pwd)/vmagent-remotewrite-data:/vmagent-remotewrite-data' \
|
||||
ARGS='-remoteWrite.url=http://localhost:8428/api/v1/write' \
|
||||
APP_NAME=vmagent \
|
||||
$(MAKE) run-via-docker
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
## vmagent
|
||||
|
||||
`vmagent` is a tiny but brave agent, which helps you collecting metrics from various sources
|
||||
and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
or any other Prometheus-compatible storage system that supports `remote_write` protocol.
|
||||
`vmagent` is a tiny but brave agent, which helps you collect metrics from various sources
|
||||
and stores them in [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
or any other Prometheus-compatible storage system that supports the `remote_write` protocol.
|
||||
|
||||
<img alt="vmagent" src="vmagent.png">
|
||||
|
||||
@@ -11,7 +11,7 @@ or any other Prometheus-compatible storage system that supports `remote_write` p
|
||||
|
||||
While VictoriaMetrics provides an efficient solution to store and observe metrics, our users needed something fast
|
||||
and RAM friendly to scrape metrics from Prometheus-compatible exporters to VictoriaMetrics.
|
||||
Also, we found that users’ infrastructure is like snowflakes - never alike, and we decided to add more flexibility
|
||||
Also, we found that users’ infrastructure are snowflakes - no two are alike, and we decided to add more flexibility
|
||||
to `vmagent` (like the ability to push metrics instead of pulling them). We did our best and plan to do even more.
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ to `vmagent` (like the ability to push metrics instead of pulling them). We did
|
||||
* Works in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as connection
|
||||
to remote storage is recovered. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth comparing to Prometheus.
|
||||
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared to Prometheus.
|
||||
|
||||
|
||||
### Quick Start
|
||||
@@ -40,8 +40,7 @@ Just download `vmutils-*` archive from [releases page](https://github.com/Victor
|
||||
and pass the following flags to `vmagent` binary in order to start scraping Prometheus targets:
|
||||
|
||||
* `-promscrape.config` with the path to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`)
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. Multiple `-remoteWrite.url` args can be set in parallel
|
||||
in order to replicate data concurrently to multiple remote storage systems.
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. The `-remoteWrite.url` argument can be specified multiple times in order to replicate data concurrently to an arbitrary amount of remote storage systems.
|
||||
|
||||
Example command line:
|
||||
|
||||
@@ -49,7 +48,7 @@ Example command line:
|
||||
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
|
||||
If you need collecting only Influx data, then the following command line would be enough:
|
||||
If you only need to collect Influx data, then the following is sufficient:
|
||||
|
||||
```
|
||||
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
@@ -79,14 +78,14 @@ See [the corresponding Makefile rules](https://github.com/VictoriaMetrics/Victor
|
||||
#### Drop-in replacement for Prometheus
|
||||
|
||||
If you use Prometheus only for scraping metrics from various targets and forwarding these metrics to remote storage,
|
||||
then `vmagent` can replace such Prometheus setup. Usually `vmagent` requires lower amounts of RAM, CPU and network bandwidth comparing to Prometheus for such setup.
|
||||
then `vmagent` can replace such Prometheus setup. Usually `vmagent` requires lower amounts of RAM, CPU and network bandwidth comparing to Prometheus for such a setup.
|
||||
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
|
||||
|
||||
|
||||
#### Replication and high availability
|
||||
|
||||
`vmagent` replicates the collected metrics among multiple remote storage instances configured via `-remoteWrite.url` args.
|
||||
If a single remote storage instance temporarily goes out of service, then the collected data remains available in another remote storage instances.
|
||||
If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instances.
|
||||
`vmagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again.
|
||||
Then it sends the buffered data to the remote storage in order to prevent data gaps in the remote storage.
|
||||
|
||||
@@ -94,13 +93,13 @@ Then it sends the buffered data to the remote storage in order to prevent data g
|
||||
#### Relabeling and filtering
|
||||
|
||||
`vmagent` can add, remove or update labels on the collected data before sending it to remote storage. Additionally,
|
||||
it can remove unneeded samples via Prometheus-like relabeling before sending the collected data to remote storage.
|
||||
it can remove unwanted samples via Prometheus-like relabeling before sending the collected data to remote storage.
|
||||
See [these docs](#relabeling) for details.
|
||||
|
||||
|
||||
#### Splitting data streams among multiple systems
|
||||
|
||||
`vmagent` supports splitting of the collected data among muliple destinations with the help of `-remoteWrite.urlRelabelConfig`,
|
||||
`vmagent` supports splitting the collected data between muliple destinations with the help of `-remoteWrite.urlRelabelConfig`,
|
||||
which is applied independently for each configured `-remoteWrite.url` destination. For instance, it is possible to replicate or split
|
||||
data among long-term remote storage, short-term remote storage and real-time analytical system [built on top of Kafka](https://github.com/Telefonica/prometheus-kafka-adapter).
|
||||
Note that each destination can receive its own subset of the collected data thanks to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
|
||||
@@ -148,6 +147,10 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
|
||||
* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA).
|
||||
See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details.
|
||||
|
||||
Note that `vmagent` doesn't support `refresh_interval` option these scrape configs. Use the corresponding `-promscrape.*CheckInterval`
|
||||
command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs`
|
||||
entries to 60s. Run `vmagent -help` in order to see default values for `-promscrape.*CheckInterval` flags.
|
||||
|
||||
|
||||
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
|
||||
|
||||
@@ -200,12 +203,12 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be
|
||||
* When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
|
||||
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`.
|
||||
|
||||
* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
|
||||
* It is recommended to increase `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
|
||||
and `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows.
|
||||
|
||||
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow big when remote storage is unavailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
|
||||
The directory can grow large when remote storage is unavailable for extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want to send all the data from the directory to remote storage, simply stop `vmagent` and delete the directory.
|
||||
|
||||
|
||||
### How to build from sources
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -39,6 +40,8 @@ var (
|
||||
"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
|
||||
"Usually :4242 must be set. Doesn't work if empty")
|
||||
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . See also -promscrape.config.dryRun")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -49,9 +52,26 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
|
||||
if *dryRun {
|
||||
if err := flag.Set("promscrape.config.strictParse", "true"); err != nil {
|
||||
logger.Panicf("BUG: cannot set promscrape.config.strictParse=true: %s", err)
|
||||
}
|
||||
if err := remotewrite.CheckRelabelConfigs(); err != nil {
|
||||
logger.Fatalf("error when checking relabel configs: %s", err)
|
||||
}
|
||||
if err := promscrape.CheckConfig(); err != nil {
|
||||
logger.Fatalf("error when checking Prometheus config: %s", err)
|
||||
}
|
||||
logger.Infof("all the configs are ok; exitting with 0 status code")
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("starting vmagent at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
remotewrite.Init()
|
||||
|
||||
@@ -2,19 +2,17 @@ package remotewrite
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"encoding/base64"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -29,6 +27,8 @@ var (
|
||||
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
|
||||
tlsCAFile = flagutil.NewArray("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
|
||||
"By default system CA is used. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
|
||||
tlsServerName = flagutil.NewArray("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to -remoteWrite.url. "+
|
||||
"By default the server name from -remoteWrite.url is used. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
|
||||
|
||||
basicAuthUsername = flagutil.NewArray("remoteWrite.basicAuth.username", "Optional basic auth username to use for -remoteWrite.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
|
||||
@@ -113,7 +113,6 @@ func newClient(argIdx int, remoteWriteURL, urlLabelValue string, fq *persistentq
|
||||
Addr: host,
|
||||
Name: "vmagent",
|
||||
Dial: statDial,
|
||||
DialDualStack: netutil.TCP6Enabled(),
|
||||
IsTLS: isTLS,
|
||||
TLSConfig: tlsCfg,
|
||||
MaxConns: maxConns,
|
||||
@@ -154,37 +153,18 @@ func (c *client) MustStop() {
|
||||
}
|
||||
|
||||
func getTLSConfig(argIdx int) (*tls.Config, error) {
|
||||
var tlsRootCA *x509.CertPool
|
||||
var tlsCertificate *tls.Certificate
|
||||
certFile := tlsCertFile.GetOptionalArg(argIdx)
|
||||
keyFile := tlsKeyFile.GetOptionalArg(argIdx)
|
||||
if certFile != "" || keyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(certFile, keyFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load TLS certificate for -remoteWrite.tlsCertFile=%q and -remoteWrite.tlsKeyFile=%q: %s", certFile, keyFile, err)
|
||||
}
|
||||
tlsCertificate = &cert
|
||||
tlsConfig := &promauth.TLSConfig{
|
||||
CAFile: tlsCAFile.GetOptionalArg(argIdx),
|
||||
CertFile: tlsCertFile.GetOptionalArg(argIdx),
|
||||
KeyFile: tlsKeyFile.GetOptionalArg(argIdx),
|
||||
ServerName: tlsServerName.GetOptionalArg(argIdx),
|
||||
InsecureSkipVerify: *tlsInsecureSkipVerify,
|
||||
}
|
||||
if caFile := tlsCAFile.GetOptionalArg(argIdx); caFile != "" {
|
||||
data, err := ioutil.ReadFile(caFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read -remoteWrite.tlsCAFile=%q: %s", caFile, err)
|
||||
}
|
||||
tlsRootCA = x509.NewCertPool()
|
||||
if !tlsRootCA.AppendCertsFromPEM(data) {
|
||||
return nil, fmt.Errorf("cannot parse data -remoteWrite.tlsCAFile=%q", caFile)
|
||||
}
|
||||
cfg, err := promauth.NewConfig(".", nil, "", "", tlsConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot populate TLS config: %s", err)
|
||||
}
|
||||
tlsCfg := &tls.Config{
|
||||
RootCAs: tlsRootCA,
|
||||
ClientSessionCache: tls.NewLRUClientSessionCache(0),
|
||||
}
|
||||
if tlsCertificate != nil {
|
||||
tlsCfg.GetClientCertificate = func(*tls.CertificateRequestInfo) (*tls.Certificate, error) {
|
||||
return tlsCertificate, nil
|
||||
}
|
||||
}
|
||||
tlsCfg.InsecureSkipVerify = *tlsInsecureSkipVerify
|
||||
tlsCfg := cfg.NewTLSConfig()
|
||||
return tlsCfg, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -15,7 +15,8 @@ import (
|
||||
|
||||
var (
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
|
||||
"Higher value reduces network bandwidth usage at the cost of delayed push of scraped data to remote storage")
|
||||
"Higher value reduces network bandwidth usage at the cost of delayed push of scraped data to remote storage. "+
|
||||
"Minimum supported interval is 1 second")
|
||||
maxUnpackedBlockSize = flag.Int("remoteWrite.maxBlockSize", 32*1024*1024, "The maximum size in bytes of unpacked request to send to remote storage. "+
|
||||
"It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics")
|
||||
)
|
||||
@@ -55,6 +56,10 @@ func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) periodicFlusher() {
|
||||
flushSeconds := int64(flushInterval.Seconds())
|
||||
if flushSeconds <= 0 {
|
||||
flushSeconds = 1
|
||||
}
|
||||
ticker := time.NewTicker(*flushInterval)
|
||||
defer ticker.Stop()
|
||||
mustStop := false
|
||||
@@ -63,7 +68,7 @@ func (ps *pendingSeries) periodicFlusher() {
|
||||
case <-ps.stopCh:
|
||||
mustStop = true
|
||||
case <-ticker.C:
|
||||
if time.Since(ps.wr.lastFlushTime) < *flushInterval/2 {
|
||||
if fasttime.UnixTimestamp()-ps.wr.lastFlushTime < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
@@ -76,7 +81,7 @@ func (ps *pendingSeries) periodicFlusher() {
|
||||
type writeRequest struct {
|
||||
wr prompbmarshal.WriteRequest
|
||||
pushBlock func(block []byte)
|
||||
lastFlushTime time.Time
|
||||
lastFlushTime uint64
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
|
||||
@@ -108,7 +113,7 @@ func (wr *writeRequest) reset() {
|
||||
|
||||
func (wr *writeRequest) flush() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.lastFlushTime = time.Now()
|
||||
wr.lastFlushTime = fasttime.UnixTimestamp()
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock)
|
||||
wr.reset()
|
||||
}
|
||||
@@ -144,13 +149,8 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
}
|
||||
dst.Labels = labelsDst[labelsLen:]
|
||||
|
||||
samplesDst = append(samplesDst, prompbmarshal.Sample{})
|
||||
dstSample := &samplesDst[len(samplesDst)-1]
|
||||
if len(src.Samples) != 1 {
|
||||
logger.Panicf("BUG: unexpected number of samples in time series; got %d; want 1", len(src.Samples))
|
||||
}
|
||||
*dstSample = src.Samples[0]
|
||||
dst.Samples = samplesDst[len(samplesDst)-1:]
|
||||
samplesDst = append(samplesDst, src.Samples...)
|
||||
dst.Samples = samplesDst[len(samplesDst)-len(src.Samples):]
|
||||
|
||||
wr.samples = samplesDst
|
||||
wr.labels = labelsDst
|
||||
|
||||
@@ -32,6 +32,21 @@ var (
|
||||
"Disk usage is unlimited if the value is set to 0")
|
||||
)
|
||||
|
||||
// CheckRelabelConfigs checks -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig.
|
||||
func CheckRelabelConfigs() error {
|
||||
if *relabelConfigPathGlobal != "" {
|
||||
if _, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal); err != nil {
|
||||
return fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %s", *relabelConfigPathGlobal, err)
|
||||
}
|
||||
}
|
||||
for _, path := range *relabelConfigPaths {
|
||||
if _, err := promrelabel.LoadRelabelConfigs(path); err != nil {
|
||||
return fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %s", path, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var rwctxs []*remoteWriteCtx
|
||||
|
||||
// Init initializes remotewrite.
|
||||
@@ -86,7 +101,7 @@ func Stop() {
|
||||
|
||||
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
|
||||
//
|
||||
// Each timeseries in wr.Timeseries must contain one sample.
|
||||
// Note that wr may be modified by Push due to relabeling.
|
||||
func Push(wr *prompbmarshal.WriteRequest) {
|
||||
var rctx *relabelCtx
|
||||
if len(prcsGlobal) > 0 || len(labelsGlobal) > 0 {
|
||||
@@ -128,6 +143,8 @@ type remoteWriteCtx struct {
|
||||
pss []*pendingSeries
|
||||
pssNextIdx uint64
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
|
||||
relabelMetricsDropped *metrics.Counter
|
||||
}
|
||||
|
||||
@@ -181,6 +198,11 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
var rctx *relabelCtx
|
||||
if len(rwctx.prcs) > 0 {
|
||||
// Make a copy of tss before applying relabeling in order to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467 for details.
|
||||
rwctx.tss = append(rwctx.tss[:0], tss...)
|
||||
tss = rwctx.tss
|
||||
rctx = getRelabelCtx()
|
||||
tssLen := len(tss)
|
||||
tss = rctx.applyRelabeling(tss, nil, rwctx.prcs)
|
||||
@@ -191,5 +213,7 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
pss[idx].Push(tss)
|
||||
if rctx != nil {
|
||||
putRelabelCtx(rctx)
|
||||
// Zero rwctx.tss in order to free up GC references.
|
||||
rwctx.tss = prompbmarshal.ResetTimeSeries(rwctx.tss)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,12 +4,17 @@ import (
|
||||
"net"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
func statDial(addr string) (net.Conn, error) {
|
||||
conn, err := fasthttp.Dial(addr)
|
||||
func statDial(addr string) (conn net.Conn, err error) {
|
||||
if netutil.TCP6Enabled() {
|
||||
conn, err = fasthttp.DialDualStack(addr)
|
||||
} else {
|
||||
conn, err = fasthttp.Dial(addr)
|
||||
}
|
||||
dialsTotal.Inc()
|
||||
if err != nil {
|
||||
dialErrors.Inc()
|
||||
|
||||
@@ -52,14 +52,16 @@ publish-vmalert:
|
||||
APP_NAME=vmalert $(MAKE) publish-via-docker
|
||||
|
||||
test-vmalert:
|
||||
go test -race -cover ./app/vmalert
|
||||
go test -v -race -cover ./app/vmalert -loggerLevel=ERROR
|
||||
go test -v -race -cover ./app/vmalert/datasource
|
||||
go test -v -race -cover ./app/vmalert/notifier
|
||||
|
||||
run-vmalert: vmalert
|
||||
./bin/vmalert -rule=app/vmalert/testdata/rules0-good.rules \
|
||||
-datasource.url=http://localhost:8428 \
|
||||
-notifier.url=http://localhost:9093 \
|
||||
-remotewrite.url=http://localhost:8428 \
|
||||
-remoteread.url=http://localhost:8428 \
|
||||
-remoteWrite.url=http://localhost:8428 \
|
||||
-remoteRead.url=http://localhost:8428 \
|
||||
-evaluationInterval=3s
|
||||
|
||||
vmalert-amd64:
|
||||
|
||||
@@ -13,7 +13,7 @@ sends alerts to [Alert Manager](https://github.com/prometheus/alertmanager).
|
||||
* Lightweight without extra dependencies.
|
||||
|
||||
### TODO:
|
||||
* Configuration hot reload.
|
||||
* Support recording rules.
|
||||
|
||||
### QuickStart
|
||||
|
||||
@@ -48,10 +48,11 @@ Rules in group evaluated one-by-one sequentially.
|
||||
* `http://<vmalert-addr>/api/v1/<groupName>/<alertID>/status" ` - get alert status by ID.
|
||||
Used as alert source in AlertManager.
|
||||
* `http://<vmalert-addr>/metrics` - application metrics.
|
||||
* `http://<vmalert-addr>/-/reload` - hot configuration reload.
|
||||
|
||||
`vmalert` may be configured with `-remotewrite` flag to write alerts state in form of timeseries
|
||||
`vmalert` may be configured with `-remoteWrite` flag to write alerts state in form of timeseries
|
||||
via remote write protocol. Alerts state will be written as `ALERTS` timeseries. These timeseries
|
||||
may be used to recover alerts state on `vmalert` restarts if `-remoteread` is configured.
|
||||
may be used to recover alerts state on `vmalert` restarts if `-remoteRead` is configured.
|
||||
|
||||
|
||||
### Configuration
|
||||
@@ -81,19 +82,21 @@ Usage of vmalert:
|
||||
Address to listen for http connections (default ":8880")
|
||||
-notifier.url string
|
||||
Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
|
||||
-remoteread.basicAuth.password string
|
||||
Optional basic auth password for -remoteread.url
|
||||
-remoteread.basicAuth.username string
|
||||
Optional basic auth username for -remoteread.url
|
||||
-remoteread.lookback duration
|
||||
-remoteRead.basicAuth.password string
|
||||
Optional basic auth password for -remoteRead.url
|
||||
-remoteRead.basicAuth.username string
|
||||
Optional basic auth username for -remoteRead.url
|
||||
-remoteRead.lookback duration
|
||||
Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
|
||||
-remoteread.url vmalert
|
||||
Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remotewrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
|
||||
-remotewrite.basicAuth.password string
|
||||
Optional basic auth password for -remotewrite.url
|
||||
-remotewrite.basicAuth.username string
|
||||
Optional basic auth username for -remotewrite.url
|
||||
-remotewrite.url string
|
||||
-remoteRead.url vmalert
|
||||
Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
|
||||
-remoteWrite.basicAuth.password string
|
||||
Optional basic auth password for -remoteWrite.url
|
||||
-remoteWrite.basicAuth.username string
|
||||
Optional basic auth username for -remoteWrite.url
|
||||
-remoteWrite.maxQueueSize
|
||||
Defines the max number of pending datapoints to remote write endpoint
|
||||
-remoteWrite.url string
|
||||
Optional URL to Victoria Metrics or VMInsert where to persist alerts state in form of timeseries. E.g. http://127.0.0.1:8428
|
||||
-rule value
|
||||
Path to the file with alert rules.
|
||||
@@ -109,8 +112,30 @@ Usage of vmalert:
|
||||
Pass `-help` to `vmalert` in order to see the full list of supported
|
||||
command-line flags with their descriptions.
|
||||
|
||||
To reload configuration without `vmalert` restart send SIGHUP signal
|
||||
or send GET request to `/-/reload` endpoint.
|
||||
|
||||
### Contributing
|
||||
|
||||
`vmalert` is mostly designed and built by VictoriaMetrics community.
|
||||
Feel free to share your experience and ideas for improving this
|
||||
software. Please keep simplicity as the main priority.
|
||||
|
||||
### How to build from sources
|
||||
|
||||
It is recommended using
|
||||
[binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
- `vmalert` is located in `vmutils-*` archives there.
|
||||
|
||||
|
||||
#### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmalert` from the root folder of the repository.
|
||||
It builds `vmalert` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmalert-prod` from the root folder of the repository.
|
||||
It builds `vmalert-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -27,29 +27,33 @@ func Parse(pathPatterns []string, validateAnnotations bool) ([]Group, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("file %s: %w", file, err)
|
||||
}
|
||||
for _, group := range gr {
|
||||
if _, ok := groupsNames[group.Name]; ok {
|
||||
return nil, fmt.Errorf("one file can not contain groups with the same name %s, filepath:%s", file, group.Name)
|
||||
for _, g := range gr {
|
||||
if _, ok := groupsNames[g.Name]; ok {
|
||||
return nil, fmt.Errorf("one file can not contain groups with the same name %s, filepath:%s", g.Name, file)
|
||||
}
|
||||
groupsNames[group.Name] = struct{}{}
|
||||
for _, rule := range group.Rules {
|
||||
g.File = file
|
||||
g.doneCh = make(chan struct{})
|
||||
g.finishedCh = make(chan struct{})
|
||||
g.updateCh = make(chan Group)
|
||||
|
||||
groupsNames[g.Name] = struct{}{}
|
||||
for _, rule := range g.Rules {
|
||||
if err = rule.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("invalid rule filepath:%s, group %s:%w", file, group.Name, err)
|
||||
return nil, fmt.Errorf("invalid rule filepath: %s, group %s: %w", file, g.Name, err)
|
||||
}
|
||||
// TODO: this init looks weird here
|
||||
rule.alerts = make(map[uint64]*notifier.Alert)
|
||||
if validateAnnotations {
|
||||
if err = notifier.ValidateTemplates(rule.Annotations); err != nil {
|
||||
return nil, fmt.Errorf("invalid annotations filepath:%s, group %s:%w", file, group.Name, err)
|
||||
return nil, fmt.Errorf("invalid annotations filepath: %s, group %s: %w", file, g.Name, err)
|
||||
}
|
||||
if err = notifier.ValidateTemplates(rule.Labels); err != nil {
|
||||
return nil, fmt.Errorf("invalid labels filepath:%s, group %s:%w", file, group.Name, err)
|
||||
return nil, fmt.Errorf("invalid labels filepath: %s, group %s: %w", file, g.Name, err)
|
||||
}
|
||||
}
|
||||
rule.group = group
|
||||
rule.group = g
|
||||
rule.alerts = make(map[uint64]*notifier.Alert)
|
||||
}
|
||||
groups = append(groups, g)
|
||||
}
|
||||
groups = append(groups, gr...)
|
||||
}
|
||||
if len(groups) < 1 {
|
||||
return nil, fmt.Errorf("no groups found in %s", strings.Join(pathPatterns, ";"))
|
||||
|
||||
197
app/vmalert/group.go
Normal file
197
app/vmalert/group.go
Normal file
@@ -0,0 +1,197 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// Group is an entity for grouping rules
|
||||
type Group struct {
|
||||
Name string
|
||||
File string
|
||||
Rules []*Rule
|
||||
|
||||
doneCh chan struct{}
|
||||
finishedCh chan struct{}
|
||||
// channel accepts new Group obj
|
||||
// which supposed to update current group
|
||||
updateCh chan Group
|
||||
}
|
||||
|
||||
// ID return unique group ID that consists of
|
||||
// rules file and group name
|
||||
func (g *Group) ID() uint64 {
|
||||
hash := fnv.New64a()
|
||||
hash.Write([]byte(g.File))
|
||||
hash.Write([]byte("\xff"))
|
||||
hash.Write([]byte(g.Name))
|
||||
return hash.Sum64()
|
||||
}
|
||||
|
||||
// Restore restores alerts state for all group rules with For > 0
|
||||
func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
|
||||
for _, rule := range g.Rules {
|
||||
if rule.For == 0 {
|
||||
return nil
|
||||
}
|
||||
if err := rule.Restore(ctx, q, lookback); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %s", rule.Name, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// updateWith updates existing group with
|
||||
// passed group object.
|
||||
// Not thread-safe.
|
||||
func (g *Group) updateWith(newGroup Group) {
|
||||
rulesRegistry := make(map[string]*Rule)
|
||||
for _, nr := range newGroup.Rules {
|
||||
rulesRegistry[nr.id()] = nr
|
||||
}
|
||||
|
||||
for i, or := range g.Rules {
|
||||
nr, ok := rulesRegistry[or.id()]
|
||||
if !ok {
|
||||
// old rule is not present in the new list
|
||||
// so we mark it for removing
|
||||
g.Rules[i] = nil
|
||||
continue
|
||||
}
|
||||
|
||||
// copy all significant fields.
|
||||
// alerts state isn't copied since
|
||||
// it should be updated in next 2 Execs
|
||||
or.For = nr.For
|
||||
or.Expr = nr.Expr
|
||||
or.Labels = nr.Labels
|
||||
or.Annotations = nr.Annotations
|
||||
delete(rulesRegistry, nr.id())
|
||||
}
|
||||
|
||||
var newRules []*Rule
|
||||
for _, r := range g.Rules {
|
||||
if r == nil {
|
||||
// skip nil rules
|
||||
continue
|
||||
}
|
||||
newRules = append(newRules, r)
|
||||
}
|
||||
// add the rest of rules from registry
|
||||
for _, nr := range rulesRegistry {
|
||||
newRules = append(newRules, nr)
|
||||
}
|
||||
g.Rules = newRules
|
||||
}
|
||||
|
||||
var (
|
||||
iterationTotal = metrics.NewCounter(`vmalert_iteration_total`)
|
||||
iterationDuration = metrics.NewSummary(`vmalert_iteration_duration_seconds`)
|
||||
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
|
||||
|
||||
alertsFired = metrics.NewCounter(`vmalert_alerts_fired_total`)
|
||||
alertsSent = metrics.NewCounter(`vmalert_alerts_sent_total`)
|
||||
alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
|
||||
|
||||
remoteWriteSent = metrics.NewCounter(`vmalert_remotewrite_sent_total`)
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
)
|
||||
|
||||
func (g *Group) close() {
|
||||
if g.doneCh == nil {
|
||||
return
|
||||
}
|
||||
close(g.doneCh)
|
||||
<-g.finishedCh
|
||||
}
|
||||
|
||||
func (g *Group) start(ctx context.Context, interval time.Duration,
|
||||
querier datasource.Querier, nr notifier.Notifier, rw *remotewrite.Client) {
|
||||
logger.Infof("group %q started", g.Name)
|
||||
t := time.NewTicker(interval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logger.Infof("group %q: context cancelled", g.Name)
|
||||
close(g.finishedCh)
|
||||
return
|
||||
case <-g.doneCh:
|
||||
logger.Infof("group %q: received stop signal", g.Name)
|
||||
close(g.finishedCh)
|
||||
return
|
||||
case ng := <-g.updateCh:
|
||||
g.updateWith(ng)
|
||||
case <-t.C:
|
||||
iterationTotal.Inc()
|
||||
iterationStart := time.Now()
|
||||
for _, rule := range g.Rules {
|
||||
execTotal.Inc()
|
||||
|
||||
execStart := time.Now()
|
||||
err := rule.Exec(ctx, querier)
|
||||
execDuration.UpdateDuration(execStart)
|
||||
|
||||
if err != nil {
|
||||
execErrors.Inc()
|
||||
logger.Errorf("failed to execute rule %q.%q: %s", g.Name, rule.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
var alertsToSend []notifier.Alert
|
||||
for _, a := range rule.alerts {
|
||||
switch a.State {
|
||||
case notifier.StateFiring:
|
||||
// set End to execStart + 3 intervals
|
||||
// so notifier can resolve it automatically if `vmalert`
|
||||
// won't be able to send resolve for some reason
|
||||
a.End = execStart.Add(3 * interval)
|
||||
alertsToSend = append(alertsToSend, *a)
|
||||
pushToRW(rw, rule, a, execStart)
|
||||
case notifier.StatePending:
|
||||
pushToRW(rw, rule, a, execStart)
|
||||
case notifier.StateInactive:
|
||||
// set End to execStart to notify
|
||||
// that it was just resolved
|
||||
a.End = execStart
|
||||
alertsToSend = append(alertsToSend, *a)
|
||||
}
|
||||
}
|
||||
if len(alertsToSend) == 0 {
|
||||
continue
|
||||
}
|
||||
alertsSent.Add(len(alertsToSend))
|
||||
if err := nr.Send(ctx, alertsToSend); err != nil {
|
||||
alertsSendErrors.Inc()
|
||||
logger.Errorf("failed to send alert for rule %q.%q: %s", g.Name, rule.Name, err)
|
||||
}
|
||||
}
|
||||
iterationDuration.UpdateDuration(iterationStart)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func pushToRW(rw *remotewrite.Client, rule *Rule, a *notifier.Alert, timestamp time.Time) {
|
||||
if rw == nil {
|
||||
return
|
||||
}
|
||||
tss := rule.AlertToTimeSeries(a, timestamp)
|
||||
remoteWriteSent.Add(len(tss))
|
||||
for _, ts := range tss {
|
||||
if err := rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
logger.Errorf("failed to push timeseries to remotewrite: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
217
app/vmalert/group_test.go
Normal file
217
app/vmalert/group_test.go
Normal file
@@ -0,0 +1,217 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
func TestUpdateWith(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
currentRules []*Rule
|
||||
// rules must be sorted by name
|
||||
newRules []*Rule
|
||||
}{
|
||||
{
|
||||
"new rule",
|
||||
[]*Rule{},
|
||||
[]*Rule{{Name: "bar"}},
|
||||
},
|
||||
{
|
||||
"update rule",
|
||||
[]*Rule{{
|
||||
Name: "foo",
|
||||
Expr: "up > 0",
|
||||
For: time.Second,
|
||||
Labels: map[string]string{
|
||||
"bar": "baz",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": "{{ $value|humanize }}",
|
||||
"description": "{{$labels}}",
|
||||
},
|
||||
}},
|
||||
[]*Rule{{
|
||||
Name: "bar",
|
||||
Expr: "up > 10",
|
||||
For: time.Second,
|
||||
Labels: map[string]string{
|
||||
"baz": "bar",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": "none",
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
"empty rule",
|
||||
[]*Rule{{Name: "foo"}},
|
||||
[]*Rule{},
|
||||
},
|
||||
{
|
||||
"multiple rules",
|
||||
[]*Rule{{Name: "bar"}, {Name: "baz"}, {Name: "foo"}},
|
||||
[]*Rule{{Name: "baz"}, {Name: "foo"}},
|
||||
},
|
||||
{
|
||||
"replace rule",
|
||||
[]*Rule{{Name: "foo1"}},
|
||||
[]*Rule{{Name: "foo2"}},
|
||||
},
|
||||
{
|
||||
"replace multiple rules",
|
||||
[]*Rule{{Name: "foo1"}, {Name: "foo2"}},
|
||||
[]*Rule{{Name: "foo3"}, {Name: "foo4"}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
g := &Group{Rules: tc.currentRules}
|
||||
g.updateWith(Group{Rules: tc.newRules})
|
||||
|
||||
if len(g.Rules) != len(tc.newRules) {
|
||||
t.Fatalf("expected to have %d rules; got: %d",
|
||||
len(g.Rules), len(tc.newRules))
|
||||
}
|
||||
sort.Slice(g.Rules, func(i, j int) bool {
|
||||
return g.Rules[i].Name < g.Rules[j].Name
|
||||
})
|
||||
for i, r := range g.Rules {
|
||||
got, want := r, tc.newRules[i]
|
||||
if got.Name != want.Name {
|
||||
t.Fatalf("expected to have rule %q; got %q", want.Name, got.Name)
|
||||
}
|
||||
if got.Expr != want.Expr {
|
||||
t.Fatalf("expected to have expression %q; got %q", want.Expr, got.Expr)
|
||||
}
|
||||
if got.For != want.For {
|
||||
t.Fatalf("expected to have for %q; got %q", want.For, got.For)
|
||||
}
|
||||
if !reflect.DeepEqual(got.Annotations, want.Annotations) {
|
||||
t.Fatalf("expected to have annotations %#v; got %#v", want.Annotations, got.Annotations)
|
||||
}
|
||||
if !reflect.DeepEqual(got.Labels, want.Labels) {
|
||||
t.Fatalf("expected to have labels %#v; got %#v", want.Labels, got.Labels)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGroupStart(t *testing.T) {
|
||||
// TODO: make parsing from string instead of file
|
||||
groups, err := Parse([]string{"testdata/rules1-good.rules"}, true)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse rules: %s", err)
|
||||
}
|
||||
g := groups[0]
|
||||
|
||||
fn := &fakeNotifier{}
|
||||
fs := &fakeQuerier{}
|
||||
|
||||
const inst1, inst2, job = "foo", "bar", "baz"
|
||||
m1 := metricWithLabels(t, "instance", inst1, "job", job)
|
||||
m2 := metricWithLabels(t, "instance", inst2, "job", job)
|
||||
|
||||
r := g.Rules[0]
|
||||
alert1, err := r.newAlert(m1)
|
||||
if err != nil {
|
||||
t.Fatalf("faield to create alert: %s", err)
|
||||
}
|
||||
alert1.State = notifier.StateFiring
|
||||
alert1.ID = hash(m1)
|
||||
|
||||
alert2, err := r.newAlert(m2)
|
||||
if err != nil {
|
||||
t.Fatalf("faield to create alert: %s", err)
|
||||
}
|
||||
alert2.State = notifier.StateFiring
|
||||
alert2.ID = hash(m2)
|
||||
|
||||
const evalInterval = time.Millisecond
|
||||
finished := make(chan struct{})
|
||||
fs.add(m1)
|
||||
fs.add(m2)
|
||||
go func() {
|
||||
g.start(context.Background(), evalInterval, fs, fn, nil)
|
||||
close(finished)
|
||||
}()
|
||||
|
||||
// wait for multiple evals
|
||||
time.Sleep(20 * evalInterval)
|
||||
|
||||
gotAlerts := fn.getAlerts()
|
||||
expectedAlerts := []notifier.Alert{*alert1, *alert2}
|
||||
compareAlerts(t, expectedAlerts, gotAlerts)
|
||||
|
||||
// reset previous data
|
||||
fs.reset()
|
||||
// and set only one datapoint for response
|
||||
fs.add(m1)
|
||||
|
||||
// wait for multiple evals
|
||||
time.Sleep(20 * evalInterval)
|
||||
|
||||
gotAlerts = fn.getAlerts()
|
||||
expectedAlerts = []notifier.Alert{*alert1}
|
||||
compareAlerts(t, expectedAlerts, gotAlerts)
|
||||
|
||||
g.close()
|
||||
<-finished
|
||||
}
|
||||
|
||||
func compareAlerts(t *testing.T, as, bs []notifier.Alert) {
|
||||
t.Helper()
|
||||
if len(as) != len(bs) {
|
||||
t.Fatalf("expected to have length %d; got %d", len(as), len(bs))
|
||||
}
|
||||
sort.Slice(as, func(i, j int) bool {
|
||||
return as[i].ID < as[j].ID
|
||||
})
|
||||
sort.Slice(bs, func(i, j int) bool {
|
||||
return bs[i].ID < bs[j].ID
|
||||
})
|
||||
for i := range as {
|
||||
a, b := as[i], bs[i]
|
||||
if a.Name != b.Name {
|
||||
t.Fatalf("expected t have Name %q; got %q", a.Name, b.Name)
|
||||
}
|
||||
if a.State != b.State {
|
||||
t.Fatalf("expected t have State %q; got %q", a.State, b.State)
|
||||
}
|
||||
if a.Value != b.Value {
|
||||
t.Fatalf("expected t have Value %f; got %f", a.Value, b.Value)
|
||||
}
|
||||
if !reflect.DeepEqual(a.Annotations, b.Annotations) {
|
||||
t.Fatalf("expected to have annotations %#v; got %#v", a.Annotations, b.Annotations)
|
||||
}
|
||||
if !reflect.DeepEqual(a.Labels, b.Labels) {
|
||||
t.Fatalf("expected to have labels %#v; got %#v", a.Labels, b.Labels)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type fakeNotifier struct {
|
||||
sync.Mutex
|
||||
alerts []notifier.Alert
|
||||
}
|
||||
|
||||
func (fn *fakeNotifier) Send(_ context.Context, alerts []notifier.Alert) error {
|
||||
fn.Lock()
|
||||
defer fn.Unlock()
|
||||
fn.alerts = alerts
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fn *fakeNotifier) getAlerts() []notifier.Alert {
|
||||
fn.Lock()
|
||||
defer fn.Unlock()
|
||||
return fn.alerts
|
||||
}
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
@@ -16,6 +15,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -38,17 +38,18 @@ absolute path to all .yaml files in root.`)
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
|
||||
|
||||
remoteWriteURL = flag.String("remotewrite.url", "", "Optional URL to Victoria Metrics or VMInsert where to persist alerts state"+
|
||||
remoteWriteURL = flag.String("remoteWrite.url", "", "Optional URL to Victoria Metrics or VMInsert where to persist alerts state"+
|
||||
" in form of timeseries. E.g. http://127.0.0.1:8428")
|
||||
remoteWriteUsername = flag.String("remotewrite.basicAuth.username", "", "Optional basic auth username for -remotewrite.url")
|
||||
remoteWritePassword = flag.String("remotewrite.basicAuth.password", "", "Optional basic auth password for -remotewrite.url")
|
||||
remoteWriteUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
|
||||
remoteWritePassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
|
||||
remoteWriteMaxQueueSize = flag.Int("remoteWrite.maxQueueSize", 10e3, "Defines the max number of pending datapoints to remote write endpoint")
|
||||
|
||||
remoteReadURL = flag.String("remoteread.url", "", "Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts"+
|
||||
" state. This configuration makes sense only if `vmalert` was configured with `remotewrite.url` before and has been successfully persisted its state."+
|
||||
remoteReadURL = flag.String("remoteRead.url", "", "Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts"+
|
||||
" state. This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state."+
|
||||
" E.g. http://127.0.0.1:8428")
|
||||
remoteReadUsername = flag.String("remoteread.basicAuth.username", "", "Optional basic auth username for -remoteread.url")
|
||||
remoteReadPassword = flag.String("remoteread.basicAuth.password", "", "Optional basic auth password for -remoteread.url")
|
||||
remoteReadLookBack = flag.Duration("remoteread.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
remoteReadUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
|
||||
remoteReadPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
|
||||
evaluationInterval = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules. Default 1m")
|
||||
@@ -56,8 +57,9 @@ absolute path to all .yaml files in root.`)
|
||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
|
||||
)
|
||||
|
||||
// TODO: hot configuration reload
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
@@ -65,26 +67,21 @@ func main() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
|
||||
if err != nil {
|
||||
logger.Fatalf("can not get external url:%s ", err)
|
||||
logger.Fatalf("can not get external url: %s ", err)
|
||||
}
|
||||
notifier.InitTemplateFunc(eu)
|
||||
|
||||
logger.Infof("reading alert rules configuration file from %s", strings.Join(*rulePath, ";"))
|
||||
groups, err := Parse(*rulePath, *validateTemplates)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse configuration file: %s", err)
|
||||
}
|
||||
|
||||
w := &watchdog{
|
||||
manager := &manager{
|
||||
groups: make(map[uint64]*Group),
|
||||
storage: datasource.NewVMStorage(*datasourceURL, *basicAuthUsername, *basicAuthPassword, &http.Client{}),
|
||||
alertProvider: notifier.NewAlertManager(*notifierURL, func(group, name string) string {
|
||||
return fmt.Sprintf("%s/api/v1/%s/%s/status", eu, group, name)
|
||||
notifier: notifier.NewAlertManager(*notifierURL, func(group, alert string) string {
|
||||
return fmt.Sprintf("%s/api/v1/%s/%s/status", eu, group, alert)
|
||||
}, &http.Client{}),
|
||||
}
|
||||
|
||||
if *remoteWriteURL != "" {
|
||||
c, err := remotewrite.NewClient(ctx, remotewrite.Config{
|
||||
Addr: *remoteWriteURL,
|
||||
MaxQueueSize: *remoteWriteMaxQueueSize,
|
||||
FlushInterval: *evaluationInterval,
|
||||
BasicAuthUser: *remoteWriteUsername,
|
||||
BasicAuthPass: *remoteWritePassword,
|
||||
@@ -92,30 +89,39 @@ func main() {
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init remotewrite client: %s", err)
|
||||
}
|
||||
w.rw = c
|
||||
manager.rw = c
|
||||
}
|
||||
|
||||
var restoreDS *datasource.VMStorage
|
||||
if *remoteReadURL != "" {
|
||||
restoreDS = datasource.NewVMStorage(*remoteReadURL, *remoteReadUsername, *remoteReadPassword, &http.Client{})
|
||||
manager.rr = datasource.NewVMStorage(*remoteReadURL, *remoteReadUsername, *remoteReadPassword, &http.Client{})
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
for _, g := range groups {
|
||||
if restoreDS != nil {
|
||||
err := g.Restore(ctx, restoreDS, *remoteReadLookBack)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring state for group %q: %s", g.Name, err)
|
||||
if err := manager.start(ctx, *rulePath, *validateTemplates); err != nil {
|
||||
logger.Fatalf("failed to start: %s", err)
|
||||
}
|
||||
|
||||
go func() {
|
||||
// init reload metrics with positive values to improve alerting conditions
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
sigHup := procutil.NewSighupChan()
|
||||
for {
|
||||
<-sigHup
|
||||
configReloads.Inc()
|
||||
logger.Infof("SIGHUP received. Going to reload rules %q ...", *rulePath)
|
||||
if err := manager.update(ctx, *rulePath, *validateTemplates, false); err != nil {
|
||||
configReloadErrors.Inc()
|
||||
configSuccess.Set(0)
|
||||
logger.Errorf("error while reloading rules: %s", err)
|
||||
continue
|
||||
}
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
logger.Infof("Rules reloaded successfully from %q", *rulePath)
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(group Group) {
|
||||
w.run(ctx, group, *evaluationInterval)
|
||||
wg.Done()
|
||||
}(g)
|
||||
}
|
||||
}()
|
||||
|
||||
go httpserver.Serve(*httpListenAddr, (&requestHandler{groups: groups}).handler)
|
||||
rh := &requestHandler{m: manager}
|
||||
go httpserver.Serve(*httpListenAddr, rh.handler)
|
||||
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("service received signal %s", sig)
|
||||
@@ -123,91 +129,16 @@ func main() {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
cancel()
|
||||
if w.rw != nil {
|
||||
err := w.rw.Close()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot stop the remotewrite: %s", err)
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
type watchdog struct {
|
||||
storage *datasource.VMStorage
|
||||
alertProvider notifier.Notifier
|
||||
rw *remotewrite.Client
|
||||
manager.close()
|
||||
}
|
||||
|
||||
var (
|
||||
iterationTotal = metrics.NewCounter(`vmalert_iteration_total`)
|
||||
iterationDuration = metrics.NewSummary(`vmalert_iteration_duration_seconds`)
|
||||
|
||||
execTotal = metrics.NewCounter(`vmalert_execution_total`)
|
||||
execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
|
||||
execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
|
||||
|
||||
alertsFired = metrics.NewCounter(`vmalert_alerts_fired_total`)
|
||||
alertsSent = metrics.NewCounter(`vmalert_alerts_sent_total`)
|
||||
alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
|
||||
|
||||
remoteWriteSent = metrics.NewCounter(`vmalert_remotewrite_sent_total`)
|
||||
remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
configReloads = metrics.NewCounter(`vmalert_config_last_reload_total`)
|
||||
configReloadErrors = metrics.NewCounter(`vmalert_config_last_reload_errors_total`)
|
||||
configSuccess = metrics.NewCounter(`vmalert_config_last_reload_successful`)
|
||||
configTimestamp = metrics.NewCounter(`vmalert_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
func (w *watchdog) run(ctx context.Context, group Group, evaluationInterval time.Duration) {
|
||||
logger.Infof("watchdog for %s has been started", group.Name)
|
||||
t := time.NewTicker(evaluationInterval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
|
||||
select {
|
||||
case <-t.C:
|
||||
iterationTotal.Inc()
|
||||
iterationStart := time.Now()
|
||||
for _, rule := range group.Rules {
|
||||
execTotal.Inc()
|
||||
|
||||
execStart := time.Now()
|
||||
err := rule.Exec(ctx, w.storage)
|
||||
execDuration.UpdateDuration(execStart)
|
||||
|
||||
if err != nil {
|
||||
execErrors.Inc()
|
||||
logger.Errorf("failed to execute rule %q.%q: %s", group.Name, rule.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
var alertsToSend []notifier.Alert
|
||||
for _, a := range rule.alerts {
|
||||
if a.State != notifier.StatePending {
|
||||
alertsToSend = append(alertsToSend, *a)
|
||||
}
|
||||
if a.State == notifier.StateInactive || w.rw == nil {
|
||||
continue
|
||||
}
|
||||
tss := rule.AlertToTimeSeries(a, execStart)
|
||||
for _, ts := range tss {
|
||||
remoteWriteSent.Inc()
|
||||
if err := w.rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
logger.Errorf("failed to push timeseries to remotewrite: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
alertsSent.Add(len(alertsToSend))
|
||||
if err := w.alertProvider.Send(alertsToSend); err != nil {
|
||||
alertsSendErrors.Inc()
|
||||
logger.Errorf("failed to send alert for rule %q.%q: %s", group.Name, rule.Name, err)
|
||||
}
|
||||
}
|
||||
iterationDuration.UpdateDuration(iterationStart)
|
||||
case <-ctx.Done():
|
||||
logger.Infof("%s received stop signal", group.Name)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL, error) {
|
||||
if externalURL != "" {
|
||||
return url.Parse(externalURL)
|
||||
|
||||
108
app/vmalert/manager.go
Normal file
108
app/vmalert/manager.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
type manager struct {
|
||||
storage datasource.Querier
|
||||
notifier notifier.Notifier
|
||||
|
||||
rw *remotewrite.Client
|
||||
rr datasource.Querier
|
||||
|
||||
wg sync.WaitGroup
|
||||
|
||||
groupsMu sync.RWMutex
|
||||
groups map[uint64]*Group
|
||||
}
|
||||
|
||||
// AlertAPI generates APIAlert object from alert by its id(hash)
|
||||
func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
|
||||
m.groupsMu.RLock()
|
||||
defer m.groupsMu.RUnlock()
|
||||
|
||||
g, ok := m.groups[gID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("can't find group with id %q", gID)
|
||||
}
|
||||
for _, rule := range g.Rules {
|
||||
if apiAlert := rule.AlertAPI(aID); apiAlert != nil {
|
||||
return apiAlert, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("can't func alert with id %q in group %q", aID, g.Name)
|
||||
}
|
||||
|
||||
func (m *manager) start(ctx context.Context, path []string, validate bool) error {
|
||||
return m.update(ctx, path, validate, true)
|
||||
}
|
||||
|
||||
func (m *manager) close() {
|
||||
if m.rw != nil {
|
||||
err := m.rw.Close()
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot stop the remotewrite: %s", err)
|
||||
}
|
||||
}
|
||||
m.wg.Wait()
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, group Group, restore bool) {
|
||||
if restore && m.rr != nil {
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring state for group %q: %s", group.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
m.wg.Add(1)
|
||||
id := group.ID()
|
||||
go func() {
|
||||
group.start(ctx, *evaluationInterval, m.storage, m.notifier, m.rw)
|
||||
m.wg.Done()
|
||||
}()
|
||||
m.groups[id] = &group
|
||||
}
|
||||
|
||||
func (m *manager) update(ctx context.Context, path []string, validate, restore bool) error {
|
||||
logger.Infof("reading alert rules configuration file from %q", strings.Join(path, ";"))
|
||||
newGroups, err := Parse(path, validate)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse configuration file: %s", err)
|
||||
}
|
||||
|
||||
groupsRegistry := make(map[uint64]Group)
|
||||
for _, ng := range newGroups {
|
||||
groupsRegistry[ng.ID()] = ng
|
||||
}
|
||||
|
||||
m.groupsMu.Lock()
|
||||
for _, og := range m.groups {
|
||||
ng, ok := groupsRegistry[og.ID()]
|
||||
if !ok {
|
||||
// old group is not present in new list
|
||||
// and must be stopped and deleted
|
||||
og.close()
|
||||
delete(m.groups, og.ID())
|
||||
og = nil
|
||||
continue
|
||||
}
|
||||
og.updateCh <- ng
|
||||
delete(groupsRegistry, ng.ID())
|
||||
}
|
||||
|
||||
for _, ng := range groupsRegistry {
|
||||
m.startGroup(ctx, ng, restore)
|
||||
}
|
||||
m.groupsMu.Unlock()
|
||||
return nil
|
||||
}
|
||||
163
app/vmalert/manager_test.go
Normal file
163
app/vmalert/manager_test.go
Normal file
@@ -0,0 +1,163 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestManagerUpdateError(t *testing.T) {
|
||||
m := &manager{groups: make(map[uint64]*Group)}
|
||||
path := []string{"foo/bar"}
|
||||
err := m.update(context.Background(), path, true, false)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to have err; got nil instead")
|
||||
}
|
||||
expErr := "no groups found"
|
||||
if !strings.Contains(err.Error(), expErr) {
|
||||
t.Fatalf("expected to got err %s; got %s", expErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestManagerUpdateConcurrent supposed to test concurrent
|
||||
// execution of configuration update.
|
||||
// Should be executed with -race flag
|
||||
func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
m := &manager{
|
||||
groups: make(map[uint64]*Group),
|
||||
storage: &fakeQuerier{},
|
||||
notifier: &fakeNotifier{},
|
||||
}
|
||||
paths := []string{
|
||||
"testdata/dir/rules0-good.rules",
|
||||
"testdata/dir/rules1-good.rules",
|
||||
"testdata/rules0-good.rules",
|
||||
}
|
||||
*evaluationInterval = time.Millisecond
|
||||
if err := m.start(context.Background(), []string{paths[0]}, true); err != nil {
|
||||
t.Fatalf("failed to start: %s", err)
|
||||
}
|
||||
|
||||
const workers = 500
|
||||
const iterations = 10
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := rand.Intn(len(paths))
|
||||
path := []string{paths[rnd]}
|
||||
err := m.update(context.Background(), path, true, false)
|
||||
if err != nil {
|
||||
t.Errorf("update error: %s", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// TestManagerUpdate tests sequential configuration
|
||||
// updates.
|
||||
func TestManagerUpdate(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
initPath string
|
||||
updatePath string
|
||||
want []*Group
|
||||
}{
|
||||
{
|
||||
name: "update good rules",
|
||||
initPath: "testdata/rules0-good.rules",
|
||||
updatePath: "testdata/dir/rules1-good.rules",
|
||||
want: []*Group{
|
||||
{
|
||||
File: "testdata/dir/rules1-good.rules",
|
||||
Name: "duplicatedGroupDiffFiles",
|
||||
Rules: []*Rule{newTestRule("VMRows", time.Second*10)},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "update good rules from 1 to 2 groups",
|
||||
initPath: "testdata/dir/rules1-good.rules",
|
||||
updatePath: "testdata/rules0-good.rules",
|
||||
want: []*Group{
|
||||
{
|
||||
File: "testdata/rules0-good.rules",
|
||||
Name: "groupGorSingleAlert", Rules: []*Rule{
|
||||
newTestRule("VMRows", time.Second*10),
|
||||
}},
|
||||
{
|
||||
File: "testdata/rules0-good.rules",
|
||||
Name: "TestGroup", Rules: []*Rule{
|
||||
newTestRule("Conns", time.Duration(0)),
|
||||
newTestRule("ExampleAlertAlwaysFiring", time.Duration(0)),
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "update with one bad rule file",
|
||||
initPath: "testdata/rules0-good.rules",
|
||||
updatePath: "testdata/dir/rules2-bad.rules",
|
||||
want: []*Group{
|
||||
{
|
||||
File: "testdata/rules0-good.rules",
|
||||
Name: "groupGorSingleAlert", Rules: []*Rule{
|
||||
newTestRule("VMRows", time.Second*10),
|
||||
}},
|
||||
{
|
||||
File: "testdata/rules0-good.rules",
|
||||
Name: "TestGroup", Rules: []*Rule{
|
||||
newTestRule("Conns", time.Duration(0)),
|
||||
newTestRule("ExampleAlertAlwaysFiring", time.Duration(0)),
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
m := &manager{groups: make(map[uint64]*Group), storage: &fakeQuerier{}}
|
||||
path := []string{tc.initPath}
|
||||
if err := m.update(ctx, path, true, false); err != nil {
|
||||
t.Fatalf("failed to complete initial rules update: %s", err)
|
||||
}
|
||||
|
||||
path = []string{tc.updatePath}
|
||||
_ = m.update(ctx, path, true, false)
|
||||
if len(tc.want) != len(m.groups) {
|
||||
t.Fatalf("\nwant number of groups: %d;\ngot: %d ", len(tc.want), len(m.groups))
|
||||
}
|
||||
|
||||
for _, wantG := range tc.want {
|
||||
gotG, ok := m.groups[wantG.ID()]
|
||||
if !ok {
|
||||
t.Fatalf("expected to have group %q", wantG.Name)
|
||||
}
|
||||
compareGroups(t, gotG, wantG)
|
||||
}
|
||||
|
||||
cancel()
|
||||
m.close()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func compareGroups(t *testing.T, a, b *Group) {
|
||||
t.Helper()
|
||||
if len(a.Rules) != len(b.Rules) {
|
||||
t.Fatalf("expected group %s to have %d rules; got: %d",
|
||||
a.Name, len(a.Rules), len(b.Rules))
|
||||
}
|
||||
for i, r := range a.Rules {
|
||||
got, want := r, b.Rules[i]
|
||||
if got.Name != want.Name {
|
||||
t.Fatalf("expected to have rule %q; got %q", want.Name, got.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,12 +12,13 @@ import (
|
||||
// Alert the triggered alert
|
||||
// TODO: Looks like alert name isn't unique
|
||||
type Alert struct {
|
||||
Group string
|
||||
GroupID uint64
|
||||
Name string
|
||||
Labels map[string]string
|
||||
Annotations map[string]string
|
||||
State AlertState
|
||||
|
||||
Expr string
|
||||
Start time.Time
|
||||
End time.Time
|
||||
Value float64
|
||||
@@ -52,14 +53,15 @@ func (as AlertState) String() string {
|
||||
type alertTplData struct {
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
Expr string
|
||||
}
|
||||
|
||||
const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}`
|
||||
const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}{{ $expr := .Expr }}`
|
||||
|
||||
// ExecTemplate executes the Alert template for give
|
||||
// map of annotations.
|
||||
func (a *Alert) ExecTemplate(annotations map[string]string) (map[string]string, error) {
|
||||
tplData := alertTplData{Value: a.Value, Labels: a.Labels}
|
||||
tplData := alertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr}
|
||||
return templateAnnotations(annotations, tplHeader, tplData)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,22 +1,27 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAlert_ExecTemplate(t *testing.T) {
|
||||
u, _ := url.Parse("https://victoriametrics.com/path")
|
||||
InitTemplateFunc(u)
|
||||
testCases := []struct {
|
||||
name string
|
||||
alert *Alert
|
||||
annotations map[string]string
|
||||
expTpl map[string]string
|
||||
}{
|
||||
{
|
||||
name: "empty-alert",
|
||||
alert: &Alert{},
|
||||
annotations: map[string]string{},
|
||||
expTpl: map[string]string{},
|
||||
},
|
||||
{
|
||||
name: "no-template",
|
||||
alert: &Alert{
|
||||
Value: 1e4,
|
||||
Labels: map[string]string{
|
||||
@@ -27,6 +32,7 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||
expTpl: map[string]string{},
|
||||
},
|
||||
{
|
||||
name: "label-template",
|
||||
alert: &Alert{
|
||||
Value: 1e4,
|
||||
Labels: map[string]string{
|
||||
@@ -43,10 +49,24 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||
"description": "It is 10000 connections for localhost",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "expression-template",
|
||||
alert: &Alert{
|
||||
Expr: `vm_rows{"label"="bar"}>0`,
|
||||
},
|
||||
annotations: map[string]string{
|
||||
"exprEscapedQuery": "{{ $expr|quotesEscape|queryEscape }}",
|
||||
"exprEscapedPath": "{{ $expr|quotesEscape|pathEscape }}",
|
||||
},
|
||||
expTpl: map[string]string{
|
||||
"exprEscapedQuery": "vm_rows%7B%5C%22label%5C%22%3D%5C%22bar%5C%22%7D%3E0",
|
||||
"exprEscapedPath": "vm_rows%7B%5C%22label%5C%22=%5C%22bar%5C%22%7D%3E0",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
tpl, err := tc.alert.ExecTemplate(tc.annotations)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
||||
@@ -2,6 +2,7 @@ package notifier
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
@@ -17,13 +18,21 @@ type AlertManager struct {
|
||||
}
|
||||
|
||||
// Send an alert or resolve message
|
||||
func (am *AlertManager) Send(alerts []Alert) error {
|
||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert) error {
|
||||
b := &bytes.Buffer{}
|
||||
writeamRequest(b, alerts, am.argFunc)
|
||||
resp, err := am.client.Post(am.alertURL, "application/json", b)
|
||||
|
||||
req, err := http.NewRequest("POST", am.alertURL, b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(ctx)
|
||||
resp, err := am.client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
@@ -37,7 +46,7 @@ func (am *AlertManager) Send(alerts []Alert) error {
|
||||
}
|
||||
|
||||
// AlertURLGenerator returns URL to single alert by given name
|
||||
type AlertURLGenerator func(group, id string) string
|
||||
type AlertURLGenerator func(group, alert string) string
|
||||
|
||||
const alertManagerPath = "/api/v2/alerts"
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
{% for i, alert := range alerts %}
|
||||
{
|
||||
"startsAt":{%q= alert.Start.Format(time.RFC3339Nano) %},
|
||||
"generatorURL": {%q= generatorURL(alert.Group, strconv.FormatUint(alert.ID, 10)) %},
|
||||
"generatorURL": {%q= generatorURL(strconv.FormatUint(alert.GroupID, 10), strconv.FormatUint(alert.ID, 10)) %},
|
||||
{% if !alert.End.IsZero() %}
|
||||
"endsAt":{%q= alert.End.Format(time.RFC3339Nano) %},
|
||||
{% endif %}
|
||||
|
||||
@@ -36,7 +36,7 @@ func streamamRequest(qw422016 *qt422016.Writer, alerts []Alert, generatorURL fun
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:11
|
||||
qw422016.N().S(`,"generatorURL":`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:12
|
||||
qw422016.N().Q(generatorURL(alert.Group, strconv.FormatUint(alert.ID, 10)))
|
||||
qw422016.N().Q(generatorURL(strconv.FormatUint(alert.GroupID, 10), strconv.FormatUint(alert.ID, 10)))
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:12
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:13
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@@ -40,8 +41,8 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
if len(a) != 1 {
|
||||
t.Errorf("expected 1 alert in array got %d", len(a))
|
||||
}
|
||||
if a[0].GeneratorURL != "group0" {
|
||||
t.Errorf("exptected alert0 as generatorURL got %s", a[0].GeneratorURL)
|
||||
if a[0].GeneratorURL != "0/0" {
|
||||
t.Errorf("exptected 0/0 as generatorURL got %s", a[0].GeneratorURL)
|
||||
}
|
||||
if a[0].Labels["alertname"] != "alert0" {
|
||||
t.Errorf("exptected alert0 as alert name got %s", a[0].Labels["alertname"])
|
||||
@@ -57,16 +58,16 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
am := NewAlertManager(srv.URL, func(group, name string) string {
|
||||
return group + name
|
||||
return group + "/" + name
|
||||
}, srv.Client())
|
||||
if err := am.Send([]Alert{{}, {}}); err == nil {
|
||||
if err := am.Send(context.Background(), []Alert{{}, {}}); err == nil {
|
||||
t.Error("expected connection error got nil")
|
||||
}
|
||||
if err := am.Send([]Alert{}); err == nil {
|
||||
if err := am.Send(context.Background(), []Alert{}); err == nil {
|
||||
t.Error("expected wrong http code error got nil")
|
||||
}
|
||||
if err := am.Send([]Alert{{
|
||||
Group: "group",
|
||||
if err := am.Send(context.Background(), []Alert{{
|
||||
GroupID: 0,
|
||||
Name: "alert0",
|
||||
Start: time.Now().UTC(),
|
||||
End: time.Now().UTC(),
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package notifier
|
||||
|
||||
import "context"
|
||||
|
||||
// Notifier is common interface for alert manager provider
|
||||
type Notifier interface {
|
||||
Send(alerts []Alert) error
|
||||
Send(ctx context.Context, alerts []Alert) error
|
||||
}
|
||||
|
||||
@@ -142,6 +142,15 @@ func InitTemplateFunc(externalURL *url.URL) {
|
||||
"externalURL": func() string {
|
||||
return externalURL.String()
|
||||
},
|
||||
"pathEscape": func(u string) string {
|
||||
return url.PathEscape(u)
|
||||
},
|
||||
"queryEscape": func(q string) string {
|
||||
return url.QueryEscape(q)
|
||||
},
|
||||
"quotesEscape": func(q string) string {
|
||||
return strings.Replace(q, `"`, `\"`, -1)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -103,7 +103,8 @@ func (c *Client) Push(s prompbmarshal.TimeSeries) error {
|
||||
case c.input <- s:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("failed to push timeseries - queue is full (%d entries)",
|
||||
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
|
||||
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
|
||||
c.maxQueueSize)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,25 +17,6 @@ import (
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
|
||||
// Group grouping array of alert
|
||||
type Group struct {
|
||||
Name string
|
||||
Rules []*Rule
|
||||
}
|
||||
|
||||
// Restore restores alerts state for all group rules with For > 0
|
||||
func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
|
||||
for _, rule := range g.Rules {
|
||||
if rule.For == 0 {
|
||||
return nil
|
||||
}
|
||||
if err := rule.Restore(ctx, q, lookback); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %s", rule.Name, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Rule is basic alert entity
|
||||
type Rule struct {
|
||||
Name string `yaml:"alert"`
|
||||
@@ -58,6 +39,10 @@ type Rule struct {
|
||||
lastExecError error
|
||||
}
|
||||
|
||||
func (r *Rule) id() string {
|
||||
return r.Name
|
||||
}
|
||||
|
||||
// Validate validates rule
|
||||
func (r *Rule) Validate() error {
|
||||
if r.Name == "" {
|
||||
@@ -86,7 +71,7 @@ func (r *Rule) Exec(ctx context.Context, q datasource.Querier) error {
|
||||
}
|
||||
|
||||
for h, a := range r.alerts {
|
||||
// cleanup inactive alerts from previous Eval
|
||||
// cleanup inactive alerts from previous Exec
|
||||
if a.State == notifier.StateInactive {
|
||||
delete(r.alerts, h)
|
||||
}
|
||||
@@ -98,8 +83,16 @@ func (r *Rule) Exec(ctx context.Context, q datasource.Querier) error {
|
||||
h := hash(m)
|
||||
updated[h] = struct{}{}
|
||||
if a, ok := r.alerts[h]; ok {
|
||||
// update Value field with latest value
|
||||
a.Value = m.Value
|
||||
if a.Value != m.Value {
|
||||
// update Value field with latest value
|
||||
a.Value = m.Value
|
||||
// and re-exec template since Value can be used
|
||||
// in templates
|
||||
err = r.template(a)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
a, err := r.newAlert(m)
|
||||
@@ -116,19 +109,19 @@ func (r *Rule) Exec(ctx context.Context, q datasource.Querier) error {
|
||||
// if alert wasn't updated in this iteration
|
||||
// means it is resolved already
|
||||
if _, ok := updated[h]; !ok {
|
||||
if a.State == notifier.StatePending {
|
||||
// alert was in Pending state - it is not
|
||||
// active anymore
|
||||
delete(r.alerts, h)
|
||||
continue
|
||||
}
|
||||
a.State = notifier.StateInactive
|
||||
// set endTime to last execution time
|
||||
// so it can be sent by notifier on next step
|
||||
a.End = r.lastExecTime
|
||||
continue
|
||||
}
|
||||
if a.State == notifier.StatePending && time.Since(a.Start) >= r.For {
|
||||
a.State = notifier.StateFiring
|
||||
alertsFired.Inc()
|
||||
}
|
||||
if a.State == notifier.StateFiring {
|
||||
a.End = r.lastExecTime.Add(3 * *evaluationInterval)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -154,15 +147,14 @@ func hash(m datasource.Metric) uint64 {
|
||||
|
||||
func (r *Rule) newAlert(m datasource.Metric) (*notifier.Alert, error) {
|
||||
a := ¬ifier.Alert{
|
||||
Group: r.group.Name,
|
||||
Name: r.Name,
|
||||
Labels: map[string]string{},
|
||||
Value: m.Value,
|
||||
Start: time.Now(),
|
||||
GroupID: r.group.ID(),
|
||||
Name: r.Name,
|
||||
Labels: map[string]string{},
|
||||
Value: m.Value,
|
||||
Start: time.Now(),
|
||||
Expr: r.Expr,
|
||||
// TODO: support End time
|
||||
}
|
||||
|
||||
// 1. use data labels
|
||||
for _, l := range m.Labels {
|
||||
// drop __name__ to be consistent with Prometheus alerting
|
||||
if l.Name == "__name__" {
|
||||
@@ -170,28 +162,31 @@ func (r *Rule) newAlert(m datasource.Metric) (*notifier.Alert, error) {
|
||||
}
|
||||
a.Labels[l.Name] = l.Value
|
||||
}
|
||||
return a, r.template(a)
|
||||
}
|
||||
|
||||
// 2. template rule labels with data labels
|
||||
func (r *Rule) template(a *notifier.Alert) error {
|
||||
// 1. template rule labels with data labels
|
||||
rLabels, err := a.ExecTemplate(r.Labels)
|
||||
if err != nil {
|
||||
return a, err
|
||||
return err
|
||||
}
|
||||
|
||||
// 3. merge data labels and rule labels
|
||||
// 2. merge data labels and rule labels
|
||||
// metric labels may be overridden by
|
||||
// rule labels
|
||||
for k, v := range rLabels {
|
||||
a.Labels[k] = v
|
||||
}
|
||||
|
||||
// 4. template merged labels
|
||||
// 3. template merged labels
|
||||
a.Labels, err = a.ExecTemplate(a.Labels)
|
||||
if err != nil {
|
||||
return a, err
|
||||
return err
|
||||
}
|
||||
|
||||
a.Annotations, err = a.ExecTemplate(r.Annotations)
|
||||
return a, err
|
||||
return err
|
||||
}
|
||||
|
||||
// AlertAPI generates APIAlert object from alert by its id(hash)
|
||||
@@ -218,10 +213,11 @@ func (r *Rule) AlertsAPI() []*APIAlert {
|
||||
|
||||
func (r *Rule) newAlertAPI(a notifier.Alert) *APIAlert {
|
||||
return &APIAlert{
|
||||
// encode as string to avoid rounding
|
||||
ID: fmt.Sprintf("%d", a.ID),
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", a.ID),
|
||||
GroupID: fmt.Sprintf("%d", a.GroupID),
|
||||
|
||||
Name: a.Name,
|
||||
Group: a.Group,
|
||||
Expression: r.Expr,
|
||||
Labels: a.Labels,
|
||||
Annotations: a.Annotations,
|
||||
@@ -298,8 +294,12 @@ func newTimeSeries(value float64, labels map[string]string, timestamp time.Time)
|
||||
|
||||
// Restore restores the state of active alerts basing on previously written timeseries.
|
||||
// Restore restores only Start field. Field State will be always Pending and supposed
|
||||
// to be updated on next Eval, as well as Value field.
|
||||
// to be updated on next Exec, as well as Value field.
|
||||
func (r *Rule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
|
||||
if q == nil {
|
||||
return fmt.Errorf("querier is nil")
|
||||
}
|
||||
|
||||
// Get the last datapoint in range via MetricsQL `last_over_time`.
|
||||
// We don't use plain PromQL since Prometheus doesn't support
|
||||
// remote write protocol which is used for state persistence in vmalert.
|
||||
@@ -314,7 +314,7 @@ func (r *Rule) Restore(ctx context.Context, q datasource.Querier, lookback time.
|
||||
labels := m.Labels
|
||||
m.Labels = make([]datasource.Label, 0)
|
||||
// drop all extra labels, so hash key will
|
||||
// be identical to timeseries received in Eval
|
||||
// be identical to timeseries received in Exec
|
||||
for _, l := range labels {
|
||||
if l.Name == alertNameLabel {
|
||||
continue
|
||||
@@ -334,7 +334,7 @@ func (r *Rule) Restore(ctx context.Context, q datasource.Querier, lookback time.
|
||||
a.State = notifier.StatePending
|
||||
a.Start = time.Unix(int64(m.Value), 0)
|
||||
r.alerts[a.ID] = a
|
||||
logger.Infof("alert %q.%q restored to state at %v", a.Group, a.Name, a.Start)
|
||||
logger.Infof("alert %q(%d) restored to state at %v", a.Name, a.ID, a.Start)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -295,16 +296,14 @@ func TestRule_Exec(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
newTestRule("for-pending=>inactive", time.Millisecond),
|
||||
newTestRule("for-pending=>empty", time.Second),
|
||||
[][]datasource.Metric{
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
{metricWithLabels(t, "name", "foo")},
|
||||
// empty step to reset pending alerts
|
||||
// empty step to reset and delete pending alerts
|
||||
{},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
|
||||
},
|
||||
map[uint64]*notifier.Alert{},
|
||||
},
|
||||
{
|
||||
newTestRule("for-pending=>firing=>inactive", time.Millisecond),
|
||||
@@ -392,19 +391,28 @@ func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
|
||||
}
|
||||
|
||||
type fakeQuerier struct {
|
||||
sync.Mutex
|
||||
metrics []datasource.Metric
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) reset() {
|
||||
fq.Lock()
|
||||
fq.metrics = fq.metrics[:0]
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
|
||||
fq.Lock()
|
||||
fq.metrics = append(fq.metrics, metrics...)
|
||||
fq.Unlock()
|
||||
}
|
||||
|
||||
func (fq fakeQuerier) Query(ctx context.Context, query string) ([]datasource.Metric, error) {
|
||||
return fq.metrics, nil
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string) ([]datasource.Metric, error) {
|
||||
fq.Lock()
|
||||
cpy := make([]datasource.Metric, len(fq.metrics))
|
||||
copy(cpy, fq.metrics)
|
||||
fq.Unlock()
|
||||
return cpy, nil
|
||||
}
|
||||
|
||||
func TestRule_Restore(t *testing.T) {
|
||||
|
||||
1
app/vmalert/testdata/dir/rules0-good.rules
vendored
1
app/vmalert/testdata/dir/rules0-good.rules
vendored
@@ -6,6 +6,7 @@ groups:
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
expr: "{{ $expr|queryEscape }}"
|
||||
annotations:
|
||||
summary: "{{ $value|humanize }}"
|
||||
description: "{{$labels}}"
|
||||
|
||||
11
app/vmalert/testdata/rules1-good.rules
vendored
Normal file
11
app/vmalert/testdata/rules1-good.rules
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
groups:
|
||||
- name: groupTest
|
||||
rules:
|
||||
- alert: VMRows
|
||||
for: 1ms
|
||||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
host: "{{ $labels.instance }}"
|
||||
annotations:
|
||||
summary: "{{ $value }}"
|
||||
@@ -10,13 +10,16 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
)
|
||||
|
||||
// APIAlert has info for an alert.
|
||||
// APIAlert represents an notifier.Alert state
|
||||
// for WEB view
|
||||
type APIAlert struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Group string `json:"group"`
|
||||
GroupID string `json:"group_id"`
|
||||
Expression string `json:"expression"`
|
||||
State string `json:"state"`
|
||||
Value string `json:"value"`
|
||||
@@ -26,14 +29,15 @@ type APIAlert struct {
|
||||
}
|
||||
|
||||
type requestHandler struct {
|
||||
groups []Group
|
||||
m *manager
|
||||
}
|
||||
|
||||
var pathList = [][]string{
|
||||
{"/api/v1/alerts", "list all active alerts"},
|
||||
{"/api/v1/groupName/alertID/status", "get alert status by ID"},
|
||||
{"/api/v1/groupID/alertID/status", "get alert status by ID"},
|
||||
// /metrics is served by httpserver by default
|
||||
{"/metrics", "list of application metrics"},
|
||||
{"/-/reload", "reload configuration"},
|
||||
}
|
||||
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
@@ -48,6 +52,11 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/api/v1/alerts":
|
||||
resph.handle(rh.list())
|
||||
return true
|
||||
case "/-/reload":
|
||||
logger.Infof("api config reload was called, sending sighup")
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
default:
|
||||
// /api/v1/<groupName>/<alertID>/status
|
||||
if strings.HasSuffix(r.URL.Path, "/status") {
|
||||
@@ -66,8 +75,10 @@ type listAlertsResponse struct {
|
||||
}
|
||||
|
||||
func (rh *requestHandler) list() ([]byte, error) {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
lr := listAlertsResponse{Status: "success"}
|
||||
for _, g := range rh.groups {
|
||||
for _, g := range rh.m.groups {
|
||||
for _, r := range g.Rules {
|
||||
lr.Data.Alerts = append(lr.Data.Alerts, r.AlertsAPI()...)
|
||||
}
|
||||
@@ -89,6 +100,9 @@ func (rh *requestHandler) list() ([]byte, error) {
|
||||
}
|
||||
|
||||
func (rh *requestHandler) alert(path string) ([]byte, error) {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
parts := strings.SplitN(strings.TrimPrefix(path, "/api/v1/"), "/", 3)
|
||||
if len(parts) != 3 {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
@@ -96,29 +110,20 @@ func (rh *requestHandler) alert(path string) ([]byte, error) {
|
||||
StatusCode: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
group := strings.TrimRight(parts[0], "/")
|
||||
idStr := strings.TrimRight(parts[1], "/")
|
||||
id, err := strconv.ParseUint(idStr, 10, 0)
|
||||
|
||||
groupID, err := uint64FromPath(parts[0])
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`cannot parse int from %q`, idStr),
|
||||
StatusCode: http.StatusBadRequest,
|
||||
}
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse groupID: %s`, err))
|
||||
}
|
||||
for _, g := range rh.groups {
|
||||
if g.Name != group {
|
||||
continue
|
||||
}
|
||||
for _, rule := range g.Rules {
|
||||
if apiAlert := rule.AlertAPI(id); apiAlert != nil {
|
||||
return json.Marshal(apiAlert)
|
||||
}
|
||||
}
|
||||
alertID, err := uint64FromPath(parts[1])
|
||||
if err != nil {
|
||||
return nil, badRequest(fmt.Errorf(`cannot parse alertID: %s`, err))
|
||||
}
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`cannot find alert %s in %q`, idStr, group),
|
||||
StatusCode: http.StatusNotFound,
|
||||
resp, err := rh.m.AlertAPI(groupID, alertID)
|
||||
if err != nil {
|
||||
return nil, errResponse(err, http.StatusNotFound)
|
||||
}
|
||||
return json.Marshal(resp)
|
||||
}
|
||||
|
||||
// responseHandler wrapper on http.ResponseWriter with sugar
|
||||
@@ -132,3 +137,19 @@ func (w responseHandler) handle(b []byte, err error) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(b)
|
||||
}
|
||||
|
||||
func uint64FromPath(path string) (uint64, error) {
|
||||
s := strings.TrimRight(path, "/")
|
||||
return strconv.ParseUint(s, 10, 0)
|
||||
}
|
||||
|
||||
func badRequest(err error) *httpserver.ErrorWithStatusCode {
|
||||
return errResponse(err, http.StatusBadRequest)
|
||||
}
|
||||
|
||||
func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
|
||||
return &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
StatusCode: sc,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,12 +17,14 @@ func TestHandler(t *testing.T) {
|
||||
0: {},
|
||||
},
|
||||
}
|
||||
rh := &requestHandler{
|
||||
groups: []Group{{
|
||||
Name: "group",
|
||||
Rules: []*Rule{rule},
|
||||
}},
|
||||
g := &Group{
|
||||
Name: "group",
|
||||
Rules: []*Rule{rule},
|
||||
}
|
||||
m := &manager{groups: make(map[uint64]*Group)}
|
||||
m.groups[0] = g
|
||||
rh := &requestHandler{m: m}
|
||||
|
||||
getResp := func(url string, to interface{}, code int) {
|
||||
t.Helper()
|
||||
resp, err := http.Get(url)
|
||||
@@ -52,19 +54,19 @@ func TestHandler(t *testing.T) {
|
||||
t.Errorf("expected 1 alert got %d", length)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/group/0/status", func(t *testing.T) {
|
||||
t.Run("/api/v1/0/0/status", func(t *testing.T) {
|
||||
alert := &APIAlert{}
|
||||
getResp(ts.URL+"/api/v1/group/0/status", alert, 200)
|
||||
getResp(ts.URL+"/api/v1/0/0/status", alert, 200)
|
||||
expAlert := rule.newAlertAPI(*rule.alerts[0])
|
||||
if !reflect.DeepEqual(alert, expAlert) {
|
||||
t.Errorf("expected %v is equal to %v", alert, expAlert)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/group/1/status", func(t *testing.T) {
|
||||
getResp(ts.URL+"/api/v1/group/1/status", nil, 404)
|
||||
t.Run("/api/v1/0/1/status", func(t *testing.T) {
|
||||
getResp(ts.URL+"/api/v1/0/1/status", nil, 404)
|
||||
})
|
||||
t.Run("/api/v1/unknown-group/0/status", func(t *testing.T) {
|
||||
getResp(ts.URL+"/api/v1/unknown-group/0/status", nil, 404)
|
||||
t.Run("/api/v1/1/0/status", func(t *testing.T) {
|
||||
getResp(ts.URL+"/api/v1/1/0/status", nil, 404)
|
||||
})
|
||||
t.Run("/", func(t *testing.T) {
|
||||
getResp(ts.URL, nil, 200)
|
||||
|
||||
@@ -19,7 +19,7 @@ The port can be modified via `-httpListenAddr` command-line flag.
|
||||
|
||||
The auth config can be reloaded by passing `SIGHUP` signal to `vmauth`.
|
||||
|
||||
Docker images for `vmauth` are available at [https://hub.docker.com/r/victoriametrics/vmauth/tags].
|
||||
Docker images for `vmauth` are available [here](https://hub.docker.com/r/victoriametrics/vmauth/tags).
|
||||
|
||||
Pass `-help` to `vmauth` in order to see all the supported command-line flags with their descriptions.
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
@@ -19,6 +20,8 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
|
||||
@@ -19,6 +19,9 @@ Backed up data can be restored with [vmrestore](https://github.com/VictoriaMetri
|
||||
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
|
||||
See also [vmbackuper](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) tool built on top of `vmbackup`. This tool simplifies
|
||||
creation of hourly, daily, weekly and monthly backups.
|
||||
|
||||
|
||||
### Use cases
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
|
||||
@@ -21,7 +21,8 @@ vmrestore -src=gcs://<bucket>/<path/to/backup> -storageDataPath=<local/path/to/r
|
||||
* `<local/path/to/restore>` is the path to folder where data will be restored. This folder must be passed
|
||||
to VictoriaMetrics in `-storageDataPath` command-line flag after the restore process is complete.
|
||||
|
||||
The original `-storageDataPath` directory may contain old files. They will be susbstituted by the files from backup.
|
||||
The original `-storageDataPath` directory may contain old files. They will be susbstituted by the files from backup,
|
||||
i.e. the end result would be similar to [rsync --delete](https://askubuntu.com/questions/476041/how-do-i-make-rsync-delete-files-that-have-been-deleted-from-the-source-folder).
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
@@ -52,7 +53,7 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot data files bigger than 2^32 bytes in memory
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
@@ -68,8 +69,8 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
-src string
|
||||
Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-storageDataPath string
|
||||
Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup (default "victoria-metrics-data")
|
||||
-version
|
||||
Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir is synchronized with -src contents, i.e. it works like 'rsync --delete' (default "victoria-metrics-data")
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/actions"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
|
||||
@@ -16,13 +17,16 @@ var (
|
||||
src = flag.String("src", "", "Source path with backup on the remote storage. "+
|
||||
"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir")
|
||||
storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Destination path where backup must be restored. "+
|
||||
"VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup")
|
||||
"VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir "+
|
||||
"is synchronized with -src contents, i.e. it works like 'rsync --delete'")
|
||||
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
|
||||
maxBytesPerSecond = flag.Int("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
|
||||
skipBackupCompleteCheck = flag.Bool("skipBackupCompleteCheck", false, "Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file")
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -395,14 +396,14 @@ func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse form values: %s", err)
|
||||
}
|
||||
date := time.Now().Unix() / secsPerDay
|
||||
date := fasttime.UnixDate()
|
||||
dateStr := r.FormValue("date")
|
||||
if len(dateStr) > 0 {
|
||||
t, err := time.Parse("2006-01-02", dateStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse `date` arg %q: %s", dateStr, err)
|
||||
}
|
||||
date = t.Unix() / secsPerDay
|
||||
date = uint64(t.Unix()) / secsPerDay
|
||||
}
|
||||
topN := 10
|
||||
topNStr := r.FormValue("topN")
|
||||
@@ -419,7 +420,7 @@ func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
|
||||
}
|
||||
topN = n
|
||||
}
|
||||
status, err := netstorage.GetTSDBStatusForDate(deadline, uint64(date), topN)
|
||||
status, err := netstorage.GetTSDBStatusForDate(deadline, date, topN)
|
||||
if err != nil {
|
||||
return fmt.Errorf(`cannot obtain tsdb status for date=%d, topN=%d: %s`, date, topN, err)
|
||||
}
|
||||
@@ -992,7 +993,7 @@ func getBool(r *http.Request, argKey string) bool {
|
||||
}
|
||||
|
||||
func currentTime() int64 {
|
||||
return int64(time.Now().UTC().Unix()) * 1e3
|
||||
return int64(fasttime.UnixTimestamp() * 1000)
|
||||
}
|
||||
|
||||
func getTagFilterssFromMatches(matches []string) ([][]storage.TagFilter, error) {
|
||||
|
||||
@@ -43,6 +43,8 @@ var aggrFuncs = map[string]aggrFunc{
|
||||
"bottomk_max": newAggrFuncRangeTopK(maxValue, true),
|
||||
"bottomk_avg": newAggrFuncRangeTopK(avgValue, true),
|
||||
"bottomk_median": newAggrFuncRangeTopK(medianValue, true),
|
||||
"any": newAggrFunc(aggrFuncAny),
|
||||
"outliersk": aggrFuncOutliersK,
|
||||
}
|
||||
|
||||
type aggrFunc func(afa *aggrFuncArg) ([]*timeseries, error)
|
||||
@@ -64,7 +66,7 @@ func newAggrFunc(afe func(tss []*timeseries) []*timeseries) aggrFunc {
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, false)
|
||||
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +82,7 @@ func removeGroupTags(metricName *storage.MetricName, modifier *metricsql.Modifie
|
||||
}
|
||||
}
|
||||
|
||||
func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *metricsql.ModifierExpr, keepOriginal bool) ([]*timeseries, error) {
|
||||
func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *metricsql.ModifierExpr, maxSeries int, keepOriginal bool) ([]*timeseries, error) {
|
||||
arg := copyTimeseriesMetricNames(argOrig)
|
||||
|
||||
// Perform grouping.
|
||||
@@ -92,7 +94,13 @@ func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeserie
|
||||
if keepOriginal {
|
||||
ts = argOrig[i]
|
||||
}
|
||||
m[string(bb.B)] = append(m[string(bb.B)], ts)
|
||||
tss := m[string(bb.B)]
|
||||
if tss == nil && maxSeries > 0 && len(m) >= maxSeries {
|
||||
// We already reached time series limit after grouping. Skip other time series.
|
||||
continue
|
||||
}
|
||||
tss = append(tss, ts)
|
||||
m[string(bb.B)] = tss
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
|
||||
@@ -112,6 +120,10 @@ func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeserie
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func aggrFuncAny(tss []*timeseries) []*timeseries {
|
||||
return tss[:1]
|
||||
}
|
||||
|
||||
func aggrFuncSum(tss []*timeseries) []*timeseries {
|
||||
if len(tss) == 1 {
|
||||
// Fast path - nothing to sum.
|
||||
@@ -441,7 +453,7 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
return rvs
|
||||
}
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, false)
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
}
|
||||
|
||||
func newAggrFuncTopK(isReverse bool) aggrFunc {
|
||||
@@ -468,15 +480,10 @@ func newAggrFuncTopK(isReverse bool) aggrFunc {
|
||||
}
|
||||
return removeNaNs(tss)
|
||||
}
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
|
||||
}
|
||||
}
|
||||
|
||||
type tsWithValue struct {
|
||||
ts *timeseries
|
||||
value float64
|
||||
}
|
||||
|
||||
func newAggrFuncRangeTopK(f func(values []float64) float64, isReverse bool) aggrFunc {
|
||||
return func(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
args := afa.args
|
||||
@@ -488,34 +495,42 @@ func newAggrFuncRangeTopK(f func(values []float64) float64, isReverse bool) aggr
|
||||
return nil, err
|
||||
}
|
||||
afe := func(tss []*timeseries) []*timeseries {
|
||||
maxs := make([]tsWithValue, len(tss))
|
||||
for i, ts := range tss {
|
||||
value := f(ts.Values)
|
||||
maxs[i] = tsWithValue{
|
||||
ts: ts,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
sort.Slice(maxs, func(i, j int) bool {
|
||||
a := maxs[i].value
|
||||
b := maxs[j].value
|
||||
if isReverse {
|
||||
a, b = b, a
|
||||
}
|
||||
return lessWithNaNs(a, b)
|
||||
})
|
||||
for i := range maxs {
|
||||
tss[i] = maxs[i].ts
|
||||
}
|
||||
for i, k := range ks {
|
||||
fillNaNsAtIdx(i, k, tss)
|
||||
}
|
||||
return removeNaNs(tss)
|
||||
return getRangeTopKTimeseries(tss, ks, f, isReverse)
|
||||
}
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
|
||||
}
|
||||
}
|
||||
|
||||
func getRangeTopKTimeseries(tss []*timeseries, ks []float64, f func(values []float64) float64, isReverse bool) []*timeseries {
|
||||
type tsWithValue struct {
|
||||
ts *timeseries
|
||||
value float64
|
||||
}
|
||||
maxs := make([]tsWithValue, len(tss))
|
||||
for i, ts := range tss {
|
||||
value := f(ts.Values)
|
||||
maxs[i] = tsWithValue{
|
||||
ts: ts,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
sort.Slice(maxs, func(i, j int) bool {
|
||||
a := maxs[i].value
|
||||
b := maxs[j].value
|
||||
if isReverse {
|
||||
a, b = b, a
|
||||
}
|
||||
return lessWithNaNs(a, b)
|
||||
})
|
||||
for i := range maxs {
|
||||
tss[i] = maxs[i].ts
|
||||
}
|
||||
for i, k := range ks {
|
||||
fillNaNsAtIdx(i, k, tss)
|
||||
}
|
||||
return removeNaNs(tss)
|
||||
}
|
||||
|
||||
func fillNaNsAtIdx(idx int, k float64, tss []*timeseries) {
|
||||
if math.IsNaN(k) {
|
||||
k = 0
|
||||
@@ -577,16 +592,54 @@ func avgValue(values []float64) float64 {
|
||||
func medianValue(values []float64) float64 {
|
||||
h := histogram.GetFast()
|
||||
for _, v := range values {
|
||||
if math.IsNaN(v) {
|
||||
continue
|
||||
if !math.IsNaN(v) {
|
||||
h.Update(v)
|
||||
}
|
||||
h.Update(v)
|
||||
}
|
||||
value := h.Quantile(0.5)
|
||||
histogram.PutFast(h)
|
||||
return value
|
||||
}
|
||||
|
||||
func aggrFuncOutliersK(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
args := afa.args
|
||||
if err := expectTransformArgsNum(args, 2); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ks, err := getScalar(args[0], 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
afe := func(tss []*timeseries) []*timeseries {
|
||||
// Calculate medians for each point across tss.
|
||||
medians := make([]float64, len(ks))
|
||||
h := histogram.GetFast()
|
||||
for n := range ks {
|
||||
h.Reset()
|
||||
for j := range tss {
|
||||
v := tss[j].Values[n]
|
||||
if !math.IsNaN(v) {
|
||||
h.Update(v)
|
||||
}
|
||||
}
|
||||
medians[n] = h.Quantile(0.5)
|
||||
}
|
||||
histogram.PutFast(h)
|
||||
|
||||
// Return topK time series with the highest variance from median.
|
||||
f := func(values []float64) float64 {
|
||||
sum2 := float64(0)
|
||||
for n, v := range values {
|
||||
d := v - medians[n]
|
||||
sum2 += d * d
|
||||
}
|
||||
return sum2
|
||||
}
|
||||
return getRangeTopKTimeseries(tss, ks, f, false)
|
||||
}
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
|
||||
}
|
||||
|
||||
func aggrFuncLimitK(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
args := afa.args
|
||||
if err := expectTransformArgsNum(args, 2); err != nil {
|
||||
@@ -618,7 +671,7 @@ func aggrFuncLimitK(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
return tss
|
||||
}
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
|
||||
}
|
||||
|
||||
func aggrFuncQuantile(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
@@ -631,7 +684,7 @@ func aggrFuncQuantile(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
return nil, err
|
||||
}
|
||||
afe := newAggrQuantileFunc(phis)
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, false)
|
||||
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
}
|
||||
|
||||
func aggrFuncMedian(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
@@ -641,30 +694,24 @@ func aggrFuncMedian(afa *aggrFuncArg) ([]*timeseries, error) {
|
||||
}
|
||||
phis := evalNumber(afa.ec, 0.5)[0].Values
|
||||
afe := newAggrQuantileFunc(phis)
|
||||
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, false)
|
||||
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
}
|
||||
|
||||
func newAggrQuantileFunc(phis []float64) func(tss []*timeseries) []*timeseries {
|
||||
return func(tss []*timeseries) []*timeseries {
|
||||
dst := tss[0]
|
||||
h := histogram.GetFast()
|
||||
defer histogram.PutFast(h)
|
||||
for n := range dst.Values {
|
||||
sort.Slice(tss, func(i, j int) bool {
|
||||
a := tss[i].Values[n]
|
||||
b := tss[j].Values[n]
|
||||
return lessWithNaNs(a, b)
|
||||
})
|
||||
h.Reset()
|
||||
for j := range tss {
|
||||
v := tss[j].Values[n]
|
||||
if !math.IsNaN(v) {
|
||||
h.Update(v)
|
||||
}
|
||||
}
|
||||
phi := phis[n]
|
||||
if math.IsNaN(phi) {
|
||||
phi = 1
|
||||
}
|
||||
if phi < 0 {
|
||||
phi = 0
|
||||
}
|
||||
if phi > 1 {
|
||||
phi = 1
|
||||
}
|
||||
idx := int(math.Round(float64(len(tss)-1) * phi))
|
||||
dst.Values[n] = tss[idx].Values[n]
|
||||
dst.Values[n] = h.Quantile(phi)
|
||||
}
|
||||
tss[0] = dst
|
||||
return tss[:1]
|
||||
|
||||
@@ -48,6 +48,11 @@ var incrementalAggrFuncCallbacksMap = map[string]*incrementalAggrFuncCallbacks{
|
||||
mergeAggrFunc: mergeAggrGeomean,
|
||||
finalizeAggrFunc: finalizeAggrGeomean,
|
||||
},
|
||||
"any": {
|
||||
updateAggrFunc: updateAggrAny,
|
||||
mergeAggrFunc: mergeAggrAny,
|
||||
finalizeAggrFunc: finalizeAggrCommon,
|
||||
},
|
||||
}
|
||||
|
||||
type incrementalAggrFuncContext struct {
|
||||
@@ -81,6 +86,10 @@ func (iafc *incrementalAggrFuncContext) updateTimeseries(ts *timeseries, workerI
|
||||
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
|
||||
iac := m[string(bb.B)]
|
||||
if iac == nil {
|
||||
if iafc.ae.Limit > 0 && len(m) >= iafc.ae.Limit {
|
||||
// Skip this time series, since the limit on the number of output time series has been already reached.
|
||||
return
|
||||
}
|
||||
tsAggr := ×eries{
|
||||
Values: make([]float64, len(ts.Values)),
|
||||
Timestamps: ts.Timestamps,
|
||||
@@ -106,6 +115,10 @@ func (iafc *incrementalAggrFuncContext) finalizeTimeseries() []*timeseries {
|
||||
for k, iac := range m {
|
||||
iacGlobal := mGlobal[k]
|
||||
if iacGlobal == nil {
|
||||
if iafc.ae.Limit > 0 && len(mGlobal) >= iafc.ae.Limit {
|
||||
// Skip this time series, since the limit on the number of output time series has been already reached.
|
||||
continue
|
||||
}
|
||||
mGlobal[k] = iac
|
||||
continue
|
||||
}
|
||||
@@ -450,3 +463,25 @@ func finalizeAggrGeomean(iac *incrementalAggrContext) {
|
||||
dstValues[i] = math.Pow(dstValues[i], 1/v)
|
||||
}
|
||||
}
|
||||
|
||||
func updateAggrAny(iac *incrementalAggrContext, values []float64) {
|
||||
dstCounts := iac.values
|
||||
if dstCounts[0] > 0 {
|
||||
return
|
||||
}
|
||||
for i := range values {
|
||||
dstCounts[i] = 1
|
||||
}
|
||||
iac.ts.Values = append(iac.ts.Values[:0], values...)
|
||||
}
|
||||
|
||||
func mergeAggrAny(dst, src *incrementalAggrContext) {
|
||||
srcValues := src.ts.Values
|
||||
srcCounts := src.values
|
||||
dstCounts := dst.values
|
||||
if dstCounts[0] > 0 {
|
||||
return
|
||||
}
|
||||
dstCounts[0] = srcCounts[0]
|
||||
dst.ts.Values = append(dst.ts.Values[:0], srcValues...)
|
||||
}
|
||||
|
||||
@@ -663,12 +663,17 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
|
||||
pointsPerTimeseries := 1 + (ec.End-ec.Start)/ec.Step
|
||||
timeseriesLen := rssLen
|
||||
if iafc != nil {
|
||||
// Incremental aggregates require hold only GOMAXPROCS timeseries in memory.
|
||||
// Incremental aggregates require holding only GOMAXPROCS timeseries in memory.
|
||||
timeseriesLen = runtime.GOMAXPROCS(-1)
|
||||
if iafc.ae.Modifier.Op != "" {
|
||||
// Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
|
||||
// since each group can have own set of time series in memory.
|
||||
timeseriesLen *= 1000
|
||||
if iafc.ae.Limit > 0 {
|
||||
// There is an explicit limit on the number of output time series.
|
||||
timeseriesLen *= iafc.ae.Limit
|
||||
} else {
|
||||
// Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
|
||||
// since each group can have own set of time series in memory.
|
||||
timeseriesLen *= 1000
|
||||
}
|
||||
}
|
||||
// The maximum number of output time series is limited by rssLen.
|
||||
if timeseriesLen > rssLen {
|
||||
|
||||
@@ -3496,6 +3496,21 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`sum(multi-vector) by (known-tag) limit 1`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sum(label_set(10, "foo", "bar") or label_set(time()/100, "baz", "sss")) by (foo) limit 1`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, 10, 10, 10},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("foo"),
|
||||
Value: []byte("bar"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`sum(multi-vector) by (known-tags)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sum(label_set(10, "foo", "bar", "baz", "sss", "x", "y") or label_set(time()/100, "baz", "sss", "foo", "bar")) by (foo, baz, foo)`
|
||||
@@ -3562,7 +3577,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
q := `sort(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{14, 15, 12, 13, 15, 11},
|
||||
Values: []float64{14, 16, 12, 13, 15, 11},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.Tags = []storage.Tag{
|
||||
@@ -3592,7 +3607,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
}
|
||||
r3 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{13, 11, 16, 19, 13, 16},
|
||||
Values: []float64{13, 10, 16, 19, 13, 16},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r3.MetricName.Tags = []storage.Tag{
|
||||
@@ -3613,7 +3628,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
q := `sort(sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s])) by (vmrange))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{14, 15, 12, 13, 15, 11},
|
||||
Values: []float64{14, 16, 12, 13, 15, 11},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.Tags = []storage.Tag{
|
||||
@@ -3635,7 +3650,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
}
|
||||
r3 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{13, 11, 16, 19, 13, 16},
|
||||
Values: []float64{13, 10, 16, 19, 13, 16},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r3.MetricName.Tags = []storage.Tag{
|
||||
@@ -3663,7 +3678,7 @@ func TestExecSuccess(t *testing.T) {
|
||||
q := `topk_max(1, histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{13, 11, 16, 19, 13, 16},
|
||||
Values: []float64{13, 10, 16, 19, 13, 16},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.Tags = []storage.Tag{
|
||||
@@ -3768,6 +3783,17 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`any()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `any(label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, 10, 10, 10},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`topk(-1)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(topk(-1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
|
||||
@@ -4170,14 +4196,63 @@ func TestExecSuccess(t *testing.T) {
|
||||
t.Run(`quantile(NaN)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `quantile(NaN, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`outliersk(0)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `outliersk(0, (
|
||||
label_set(1300, "foo", "bar"),
|
||||
label_set(time(), "baz", "sss"),
|
||||
))`
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`outliersk(1)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `outliersk(1, (
|
||||
label_set(2000, "foo", "bar"),
|
||||
label_set(time(), "baz", "sss"),
|
||||
))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
|
||||
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("baz"),
|
||||
Value: []byte("sss"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`outliersk(3)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort_desc(outliersk(3, (
|
||||
label_set(1300, "foo", "bar"),
|
||||
label_set(time(), "baz", "sss"),
|
||||
)))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("baz"),
|
||||
Value: []byte("sss"),
|
||||
}}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1300, 1300, 1300, 1300, 1300, 1300},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.Tags = []storage.Tag{{
|
||||
Key: []byte("foo"),
|
||||
Value: []byte("bar"),
|
||||
}}
|
||||
resultExpected := []netstorage.Result{r1, r2}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`range_quantile(0.5)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `range_quantile(0.5, time())`
|
||||
@@ -5505,6 +5580,8 @@ func TestExecError(t *testing.T) {
|
||||
f(`hoeffding_bound_upper()`)
|
||||
f(`hoeffding_bound_upper(1)`)
|
||||
f(`hoeffding_bound_upper(0.99, foo, 1)`)
|
||||
f(`outliersk()`)
|
||||
f(`outliersk(1)`)
|
||||
|
||||
// Invalid argument type
|
||||
f(`median_over_time({}, 2)`)
|
||||
@@ -5544,6 +5621,7 @@ func TestExecError(t *testing.T) {
|
||||
f(`alias(1, 2)`)
|
||||
f(`aggr_over_time(1, 2)`)
|
||||
f(`aggr_over_time(("foo", "bar"), 3)`)
|
||||
f(`outliersk((label_set(1, "foo", "bar"), label_set(2, "x", "y")), 123)`)
|
||||
|
||||
// Duplicate timeseries
|
||||
f(`(label_set(1, "foo", "bar") or label_set(2, "foo", "baz"))
|
||||
|
||||
@@ -72,6 +72,8 @@ var rollupFuncs = map[string]newRollupFunc{
|
||||
"aggr_over_time": newRollupFuncTwoArgs(rollupFake),
|
||||
"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
|
||||
"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
|
||||
"ascent_over_time": newRollupFuncOneArg(rollupAscentOverTime),
|
||||
"descent_over_time": newRollupFuncOneArg(rollupDescentOverTime),
|
||||
|
||||
// `timestamp` function must return timestamp for the last datapoint on the current window
|
||||
// in order to properly handle offset and timestamps unaligned to the current step.
|
||||
@@ -116,6 +118,9 @@ var rollupAggrFuncs = map[string]rollupFunc{
|
||||
"scrape_interval": rollupScrapeInterval,
|
||||
"tmin_over_time": rollupTmin,
|
||||
"tmax_over_time": rollupTmax,
|
||||
"ascent_over_time": rollupAscentOverTime,
|
||||
"descent_over_time": rollupDescentOverTime,
|
||||
"timestamp": rollupTimestamp,
|
||||
}
|
||||
|
||||
var rollupFuncsCannotAdjustWindow = map[string]bool{
|
||||
@@ -138,6 +143,8 @@ var rollupFuncsCannotAdjustWindow = map[string]bool{
|
||||
"increases_over_time": true,
|
||||
"decreases_over_time": true,
|
||||
"integrate": true,
|
||||
"ascent_over_time": true,
|
||||
"descent_over_time": true,
|
||||
}
|
||||
|
||||
var rollupFuncsRemoveCounterResets = map[string]bool{
|
||||
@@ -1527,6 +1534,52 @@ func rollupTimestamp(rfa *rollupFuncArg) float64 {
|
||||
return float64(timestamps[len(timestamps)-1]) / 1e3
|
||||
}
|
||||
|
||||
func rollupAscentOverTime(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
values := rfa.values
|
||||
prevValue := rfa.prevValue
|
||||
if math.IsNaN(prevValue) {
|
||||
if len(values) == 0 {
|
||||
return nan
|
||||
}
|
||||
prevValue = values[0]
|
||||
values = values[1:]
|
||||
}
|
||||
s := float64(0)
|
||||
for _, v := range values {
|
||||
d := v - prevValue
|
||||
if d > 0 {
|
||||
s += d
|
||||
}
|
||||
prevValue = v
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func rollupDescentOverTime(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
values := rfa.values
|
||||
prevValue := rfa.prevValue
|
||||
if math.IsNaN(prevValue) {
|
||||
if len(values) == 0 {
|
||||
return nan
|
||||
}
|
||||
prevValue = values[0]
|
||||
values = values[1:]
|
||||
}
|
||||
s := float64(0)
|
||||
for _, v := range values {
|
||||
d := prevValue - v
|
||||
if d > 0 {
|
||||
s += d
|
||||
}
|
||||
prevValue = v
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func rollupFirst(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
|
||||
@@ -64,18 +65,18 @@ func InitRollupResultCache(cachePath string) {
|
||||
|
||||
stats := &fastcache.Stats{}
|
||||
var statsLock sync.Mutex
|
||||
var statsLastUpdate time.Time
|
||||
var statsLastUpdate uint64
|
||||
fcs := func() *fastcache.Stats {
|
||||
statsLock.Lock()
|
||||
defer statsLock.Unlock()
|
||||
|
||||
if time.Since(statsLastUpdate) < time.Second {
|
||||
if fasttime.UnixTimestamp()-statsLastUpdate < 2 {
|
||||
return stats
|
||||
}
|
||||
var fcs fastcache.Stats
|
||||
c.UpdateStats(&fcs)
|
||||
stats = &fcs
|
||||
statsLastUpdate = time.Now()
|
||||
statsLastUpdate = fasttime.UnixTimestamp()
|
||||
return stats
|
||||
}
|
||||
if len(rollupResultCachePath) > 0 {
|
||||
|
||||
@@ -389,6 +389,9 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
|
||||
f("ideriv", 0)
|
||||
f("decreases_over_time", 5)
|
||||
f("increases_over_time", 5)
|
||||
f("ascent_over_time", 142)
|
||||
f("descent_over_time", 231)
|
||||
f("timestamp", 0.13)
|
||||
}
|
||||
|
||||
func TestRollupNewRollupFuncError(t *testing.T) {
|
||||
|
||||
@@ -409,6 +409,16 @@ func registerStorageMetrics() {
|
||||
return float64(m().AddRowsConcurrencyCurrent)
|
||||
})
|
||||
|
||||
metrics.NewGauge(`vm_slow_row_inserts_total`, func() float64 {
|
||||
return float64(m().SlowRowInserts)
|
||||
})
|
||||
metrics.NewGauge(`vm_slow_per_day_index_inserts_total`, func() float64 {
|
||||
return float64(m().SlowPerDayIndexInserts)
|
||||
})
|
||||
metrics.NewGauge(`vm_slow_metric_name_loads_total`, func() float64 {
|
||||
return float64(m().SlowMetricNameLoads)
|
||||
})
|
||||
|
||||
metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
|
||||
return float64(tm().BigRowsCount)
|
||||
})
|
||||
@@ -452,6 +462,9 @@ func registerStorageMetrics() {
|
||||
metrics.NewGauge(`vm_cache_entries{type="storage/hour_metric_ids"}`, func() float64 {
|
||||
return float64(m().HourMetricIDCacheSize)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_entries{type="storage/next_day_metric_ids"}`, func() float64 {
|
||||
return float64(m().NextDayMetricIDCacheSize)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_entries{type="storage/bigIndexBlocks"}`, func() float64 {
|
||||
return float64(tm().BigIndexBlocksCacheSize)
|
||||
})
|
||||
@@ -492,6 +505,9 @@ func registerStorageMetrics() {
|
||||
metrics.NewGauge(`vm_cache_size_bytes{type="storage/hour_metric_ids"}`, func() float64 {
|
||||
return float64(m().HourMetricIDCacheSizeBytes)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_size_bytes{type="storage/next_day_metric_ids"}`, func() float64 {
|
||||
return float64(m().NextDayMetricIDCacheSizeBytes)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/tagFilters"}`, func() float64 {
|
||||
return float64(idbm().TagCacheSizeBytes)
|
||||
})
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ DOCKER_NAMESPACE := victoriametrics
|
||||
|
||||
ROOT_IMAGE ?= scratch
|
||||
CERTS_IMAGE := alpine:3.11
|
||||
GO_BUILDER_IMAGE := golang:1.14.2
|
||||
GO_BUILDER_IMAGE := golang:1.14.3
|
||||
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr : _)
|
||||
BASE_IMAGE := local/base:1.1.1-$(shell echo $(ROOT_IMAGE) | tr : _)-$(shell echo $(CERTS_IMAGE) | tr : _)
|
||||
|
||||
@@ -32,7 +32,9 @@ app-via-docker: package-base package-builder
|
||||
--env GO111MODULE=on \
|
||||
$(DOCKER_OPTS) \
|
||||
$(BUILDER_IMAGE) \
|
||||
go build $(RACE) -mod=vendor -trimpath -ldflags "-s -w -extldflags '-static' $(GO_BUILDINFO)" -tags 'netgo osusergo' \
|
||||
go build $(RACE) -mod=vendor -trimpath \
|
||||
-ldflags "-extldflags '-static' $(GO_BUILDINFO)" \
|
||||
-tags 'netgo osusergo nethttpomithttp2' \
|
||||
-o bin/$(APP_NAME)$(APP_SUFFIX)-prod $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
package-via-docker:
|
||||
|
||||
@@ -30,6 +30,8 @@
|
||||
|
||||
* [Better Prometheus rate() function with VictoriaMetrics](https://www.percona.com/blog/2020/02/28/better-prometheus-rate-function-with-victoriametrics/)
|
||||
* [Infrastructure monitoring with Prometheus at Zerodha](https://zerodha.tech/blog/infra-monitoring-at-zerodha/)
|
||||
* [CMS monitoring R&D: Real-time monitoring and alerts](https://indico.cern.ch/event/877333/contributions/3696707/attachments/1972189/3281133/CMS_mon_RD_for_opInt.pdf)
|
||||
* [Disk usage: VictoriaMetrics vs Prometheus](https://stas.starikevich.com/posts/disk-usage-for-vm-versus-prometheus/)
|
||||
* [Benchmarking time series workloads on Apache Kudu using TSBS](https://blog.cloudera.com/benchmarking-time-series-workloads-on-apache-kudu-using-tsbs/)
|
||||
* [What are Open Source Time Series Databases?](https://www.iunera.com/kraken/fabric/time-series-database/)
|
||||
* [Evaluating performance and correctness](https://www.robustperception.io/evaluating-performance-and-correctness)
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
Below are approved public case studies and talks from VictoriaMetrics users. Join our [community Slack channel](http://slack.victoriametrics.com/)
|
||||
and feel free asking for references, reviews and additional case studies from real VictoriaMetrics users there.
|
||||
|
||||
|
||||
## Adidas
|
||||
|
||||
See [slides](https://promcon.io/2019-munich/slides/remote-write-storage-wars.pdf) and [video](https://youtu.be/OsH6gPdxR4s)
|
||||
@@ -10,6 +11,20 @@ from [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-wri
|
||||
VictoriaMetrics is compared to Thanos, Corex and M3DB in the talk.
|
||||
|
||||
|
||||
## CERN
|
||||
|
||||
The European Organization for Nuclear Research known as [CERN](https://home.cern/) uses VictoriaMetrics for real-time monitoring
|
||||
of the [CMS](https://home.cern/science/experiments/cms) detector system.
|
||||
According to [published talk](https://indico.cern.ch/event/877333/contributions/3696707/attachments/1972189/3281133/CMS_mon_RD_for_opInt.pdf)
|
||||
VictoriaMetrics is used for the following purposes as a part of "CMS Monitoring cluster":
|
||||
|
||||
* As long-term storage for messages consumed from the [NATS messaging system](https://nats.io/). Consumed messages are pushed directly to VictoriaMetrics via HTTP protocol
|
||||
* As long-term storage for Prometheus monitoring system (30 days retention policy, there are plans to increase it up to ½ year)
|
||||
* As a data source for visualizing metrics in Grafana.
|
||||
|
||||
R&D topic: Evaluate VictoraMetrics vs InfluxDB for large cardinality data.
|
||||
|
||||
|
||||
## COLOPL
|
||||
|
||||
[COLOPL](http://www.colopl.co.jp/en/) is Japaneese Game Development company. It started using VictoriaMetrics
|
||||
|
||||
@@ -167,21 +167,23 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr
|
||||
- `<accountID>` is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant). It is possible to set it as `accountID:projectID`,
|
||||
where `projectID` is also arbitrary 32-bit integer. If `projectID` isn't set, then it equals to `0`.
|
||||
- `<suffix>` may have the following values:
|
||||
- `prometheus` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
|
||||
- `influx/write` or `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
|
||||
- `prometheus` and `prometheus/api/v1/write` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
|
||||
- `influx/write` and `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/).
|
||||
- `opentsdb/api/put` - for accepting [OpenTSDB HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html).
|
||||
- `prometheus/api/v1/import` - for importing data obtained via `api/v1/export` on `vmselect` (see below).
|
||||
- `prometheus/api/v1/import/csv` - for importing arbitrary CSV data. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-csv-data) for details.
|
||||
|
||||
* URLs for querying: `http://<vmselect>:8481/select/<accountID>/prometheus/<suffix>`, where:
|
||||
- `<accountID>` is an arbitrary number identifying data namespace for the query (aka tenant)
|
||||
- `<suffix>` may have the following values:
|
||||
- `api/v1/query` - performs [PromQL instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
|
||||
- `api/v1/query_range` - performs [PromQL range query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries)
|
||||
- `api/v1/series` - performs [series query](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
|
||||
- `api/v1/labels` - returns a [list of label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
|
||||
- `api/v1/label/<label_name>/values` - returns values for the given `<label_name>` according [to API](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
|
||||
- `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/)
|
||||
- `api/v1/export` - exports raw data. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details
|
||||
- `api/v1/query` - performs [PromQL instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries).
|
||||
- `api/v1/query_range` - performs [PromQL range query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries).
|
||||
- `api/v1/series` - performs [series query](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers).
|
||||
- `api/v1/labels` - returns a [list of label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names).
|
||||
- `api/v1/label/<label_name>/values` - returns values for the given `<label_name>` according [to API](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values).
|
||||
- `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/).
|
||||
- `api/v1/export` - exports raw data. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details.
|
||||
- `api/v1/status/tsdb` - for time series stats. See [these docs](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) for details.
|
||||
|
||||
* URL for time series deletion: `http://<vmselect>:8481/delete/<accountID>/prometheus/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`.
|
||||
Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't
|
||||
|
||||
@@ -4,6 +4,8 @@ VictoriaMetrics implements MetricsQL - query language inspired by [PromQL](https
|
||||
It is backwards compatible with PromQL, so Grafana dashboards backed by Prometheus datasource should work the same after switching from Prometheus to VictoriaMetrics.
|
||||
[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/metricsql) can be used for parsing MetricsQL in external apps.
|
||||
|
||||
If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
|
||||
|
||||
The following functionality is implemented differently in MetricsQL comparing to PromQL in order to improve user experience:
|
||||
* MetricsQL takes into account the previous point before the window in square brackets for range functions such as `rate` and `increase`.
|
||||
It also doesn't extrapolate range function results. This addresses [this issue from Prometheus](https://github.com/prometheus/prometheus/issues/3746).
|
||||
@@ -22,6 +24,8 @@ Feel free [filing a feature request](https://github.com/VictoriaMetrics/Victoria
|
||||
This functionality can be tried at [an editable Grafana dashboard](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo).
|
||||
|
||||
- [`WITH` templates](https://play.victoriametrics.com/promql/expand-with-exprs). This feature simplifies writing and managing complex queries. Go to [`WITH` templates playground](https://victoriametrics.com/promql/expand-with-exprs) and try it.
|
||||
- All the aggregate functions support optional `limit N` suffix in order to limit the number of output series. For example, `sum(x) by (y) limit 10` limits
|
||||
the number of output time series after the aggregation to 10. All the other time series are dropped.
|
||||
- Metric names and metric labels may contain escaped chars. For instance, `foo\-bar{baz\=aa="b"}` is valid expression. It returns time series with name `foo-bar` containing label `baz=aa` with value `b`. Additionally, `\xXX` escape sequence is supported, where `XX` is hexadecimal representation of escaped char.
|
||||
- `offset`, range duration and step value for range vector may refer to the current step aka `$__interval` value from Grafana.
|
||||
For instance, `rate(metric[10i] offset 5i)` would return per-second rate over a range covering 10 previous steps with the offset of 5 steps.
|
||||
@@ -72,6 +76,8 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g
|
||||
- `median_over_time(m[d])` - calculates median values for `m` over `d` time window. Shorthand to `quantile_over_time(0.5, m[d])`.
|
||||
- `median(q)` - median aggregate. Shorthand to `quantile(0.5, q)`.
|
||||
- `limitk(k, q)` - limits the number of time series returned from `q` to `k`.
|
||||
- `any(q) by (x)` - returns any time series from `q` for each group in `x`. Note that `any()` removes all the labels except of those listed in `by (x)`.
|
||||
Use `limitk(1, q)` if you need retaining all the labels from `q`.
|
||||
- `keep_last_value(q)` - fills missing data (gaps) in `q` with the previous non-empty value.
|
||||
- `keep_next_value(q)` - fills missing data (gaps) in `q` with the next non-empty value.
|
||||
- `distinct_over_time(m[d])` - returns distinct number of values for `m` data points over `d` duration.
|
||||
@@ -112,3 +118,7 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g
|
||||
for the given `phi` in the range `[0..1]`.
|
||||
- `last_over_time(m[d])` - returns the last value for `m` on the time range `d`.
|
||||
- `first_over_time(m[d])` - returns the first value for `m` on the time range `d`.
|
||||
- `outliersk(N, m)` - returns up to `N` outlier time series for `m`. Outlier time series have the highest deviation from the `median(m)`.
|
||||
This aggregate function is useful to detect anomalies across groups of similar time series.
|
||||
- `ascent_over_time(m[d])` - returns the sum of positive deltas between adjancent data points in `m` over `d`. Useful for tracking height gains in GPS track.
|
||||
- `descent_over_time(m[d])` - returns the absolute sum of negative deltas between adjancent data points in `m` over `d`. Useful for tracking height loss in GPS track.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Quick Start
|
||||
|
||||
1. Download the latest VictoriaMetrics release from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
|
||||
from [Docker hub](https://hub.docker.com/r/valyala/victoria-metrics/)
|
||||
from [Docker hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/)
|
||||
or [build it from sources](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#how-to-build-from-sources).
|
||||
|
||||
2. Run the binary or Docker image with the desired command-line flags. Pass `-help` in order to see description for all the available flags
|
||||
@@ -17,8 +17,10 @@
|
||||
See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/43) in order to configure VictoriaMetrics as OS service.
|
||||
It is recommended setting up [VictoriaMetrics monitoring](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#monitoring).
|
||||
|
||||
3. Configure all the Prometheus instances to write data to VictoriaMetrics.
|
||||
See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#prometheus-setup).
|
||||
3. Configure [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) or Prometheus to write data to VictoriaMetrics.
|
||||
It is recommended to use `vmagent` instead of Prometheus, since it is more resource efficient. If you still prefer Prometheus, then
|
||||
see [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#prometheus-setup)
|
||||
for details on how to configure Prometheus.
|
||||
|
||||
4. Configure Grafana to query VictoriaMetrics instead of Prometheus.
|
||||
See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#grafana-setup).
|
||||
|
||||
@@ -10,16 +10,21 @@
|
||||
|
||||
## VictoriaMetrics
|
||||
|
||||
VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus.
|
||||
VictoriaMetrics is fast, cost-effective and scalable time-series database.
|
||||
|
||||
It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
|
||||
[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and
|
||||
in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just download VictoriaMetrics and see [how to start it](#how-to-start-victoriametrics).
|
||||
|
||||
Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
|
||||
[Contact us](mailto:info@victoriametrics.com) if you need paid enterprise support for VictoriaMetrics.
|
||||
|
||||
|
||||
## Case studies and talks
|
||||
|
||||
* [Adidas](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adidas)
|
||||
* [CERN](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#cern)
|
||||
* [COLOPL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#colopl)
|
||||
* [Wix.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wixcom)
|
||||
* [Wedos.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wedoscom)
|
||||
@@ -33,6 +38,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
|
||||
|
||||
## Prominent features
|
||||
|
||||
* VictoriaMetrics can be used as long-term storage for Prometheus or for [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
|
||||
See [these docs](#prometheus-setup) for details.
|
||||
* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
|
||||
VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL) query language, which is inspired by PromQL.
|
||||
* Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
|
||||
@@ -115,6 +122,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
|
||||
* [Monitoring](#monitoring)
|
||||
* [Troubleshooting](#troubleshooting)
|
||||
* [Backfilling](#backfilling)
|
||||
* [Replication](#replication)
|
||||
* [Backups](#backups)
|
||||
* [Profiling](#profiling)
|
||||
* [Integrations](#integrations)
|
||||
* [Third-party contributions](#third-party-contributions)
|
||||
@@ -139,6 +148,8 @@ The following command-line flags are used the most:
|
||||
* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted. Default period is 1 month.
|
||||
* `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces.
|
||||
|
||||
Other flags have good enough default values, so set them only if you really need this.
|
||||
|
||||
Pass `-help` to see all the available flags with description and default values.
|
||||
|
||||
It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics.
|
||||
@@ -779,6 +790,8 @@ remote_write:
|
||||
kill -HUP `pidof prometheus`
|
||||
```
|
||||
|
||||
It is recommended to use [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) instead of Prometheus for highly loaded setups.
|
||||
|
||||
4) Now Prometheus should write data into all the configured `remote_write` urls in parallel.
|
||||
5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
|
||||
6) Set up Prometheus datasource in Grafana that points to Promxy.
|
||||
@@ -789,6 +802,7 @@ to write data to `victoriametrics-addr-1`, while each `r2` should write data to
|
||||
Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
|
||||
with the enabled de-duplication. See [this section](#deduplication) for details.
|
||||
|
||||
|
||||
### Deduplication
|
||||
|
||||
VictoriaMetrics de-duplicates data points if `-dedup.minScrapeInterval` command-line flag
|
||||
@@ -890,7 +904,8 @@ mkfs.ext4 ... -O 64bit,huge_file,extent -T huge
|
||||
### Monitoring
|
||||
|
||||
VictoriaMetrics exports internal metrics in Prometheus format at `/metrics` page.
|
||||
These metrics may be collected via Prometheus by adding the corresponding scrape config to it.
|
||||
These metrics may be collected by [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md)
|
||||
or Prometheus by adding the corresponding scrape config to it.
|
||||
Alternatively they can be self-scraped by setting `-selfScrapeInterval` command-line flag to duration greater than 0.
|
||||
For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page with 10 seconds interval.
|
||||
|
||||
@@ -901,13 +916,17 @@ The most interesting metrics are:
|
||||
|
||||
* `vm_cache_entries{type="storage/hour_metric_ids"}` - the number of time series with new data points during the last hour
|
||||
aka active time series.
|
||||
* `rate(vm_new_timeseries_created_total[5m])` - time series churn rate.
|
||||
* `vm_rows{type="indexdb"}` - the number of rows in inverted index. High value for this number usually mean high churn rate for time series.
|
||||
* Sum of `vm_rows{type="storage/big"}` and `vm_rows{type="storage/small"}` - total number of `(timestamp, value)` data points
|
||||
in the database.
|
||||
* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start.
|
||||
* `increase(vm_new_timeseries_created_total[1h])` - time series churn rate during the previous hour.
|
||||
* `sum(vm_rows{type=~"storage/.*"})` - total number of `(timestamp, value)` data points in the database.
|
||||
* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
|
||||
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
||||
* `sum(vm_data_size_bytes)` - the total data size on disk.
|
||||
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
||||
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
|
||||
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||
of the current number of active time series.
|
||||
* `increase(vm_slow_metric_name_loads_total[5m])` - the number of slow loads of metric names during the last 5 minutes.
|
||||
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||
of the current number of active time series.
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
@@ -920,8 +939,9 @@ The most interesting metrics are:
|
||||
|
||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||
then it is likely you have too many active time series for the current amount of RAM.
|
||||
VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics, which could be used as an indicator of low amounts of RAM.
|
||||
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
||||
ingestion performance.
|
||||
ingestion and query performance in this case.
|
||||
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
||||
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
||||
|
||||
@@ -968,6 +988,24 @@ the query cache, which could contain incomplete data cached during the backfilli
|
||||
Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
|
||||
for data with timestamps close to the current time.
|
||||
|
||||
|
||||
### Replication
|
||||
|
||||
VictoriaMetrics relies on replicated durable persistent storage such as [Google Cloud disks](https://cloud.google.com/compute/docs/disks#pdspecs)
|
||||
or [Amazon EBS](https://aws.amazon.com/ebs/). It is also recommended making periodic backups,
|
||||
since [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883).
|
||||
See [backup docs](#backups) for details.
|
||||
|
||||
See also [high availability docs](#high-availability) and [docs about cluster version of VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md).
|
||||
|
||||
|
||||
### Backups
|
||||
|
||||
VictoriaMetrics supports backups via [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md)
|
||||
and [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md) tools.
|
||||
We also provide provide `vmbackuper` tool for paid enterprise subscribers - see [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for details.
|
||||
|
||||
|
||||
### Profiling
|
||||
|
||||
VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
## vmagent
|
||||
|
||||
`vmagent` is a tiny but brave agent, which helps you collecting metrics from various sources
|
||||
and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
or any other Prometheus-compatible storage system that supports `remote_write` protocol.
|
||||
`vmagent` is a tiny but brave agent, which helps you collect metrics from various sources
|
||||
and stores them in [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
or any other Prometheus-compatible storage system that supports the `remote_write` protocol.
|
||||
|
||||
<img alt="vmagent" src="vmagent.png">
|
||||
|
||||
@@ -11,7 +11,7 @@ or any other Prometheus-compatible storage system that supports `remote_write` p
|
||||
|
||||
While VictoriaMetrics provides an efficient solution to store and observe metrics, our users needed something fast
|
||||
and RAM friendly to scrape metrics from Prometheus-compatible exporters to VictoriaMetrics.
|
||||
Also, we found that users’ infrastructure is like snowflakes - never alike, and we decided to add more flexibility
|
||||
Also, we found that users’ infrastructure are snowflakes - no two are alike, and we decided to add more flexibility
|
||||
to `vmagent` (like the ability to push metrics instead of pulling them). We did our best and plan to do even more.
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ to `vmagent` (like the ability to push metrics instead of pulling them). We did
|
||||
* Works in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as connection
|
||||
to remote storage is recovered. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth comparing to Prometheus.
|
||||
* Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared to Prometheus.
|
||||
|
||||
|
||||
### Quick Start
|
||||
@@ -40,8 +40,7 @@ Just download `vmutils-*` archive from [releases page](https://github.com/Victor
|
||||
and pass the following flags to `vmagent` binary in order to start scraping Prometheus targets:
|
||||
|
||||
* `-promscrape.config` with the path to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`)
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. Multiple `-remoteWrite.url` args can be set in parallel
|
||||
in order to replicate data concurrently to multiple remote storage systems.
|
||||
* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. The `-remoteWrite.url` argument can be specified multiple times in order to replicate data concurrently to an arbitrary amount of remote storage systems.
|
||||
|
||||
Example command line:
|
||||
|
||||
@@ -49,7 +48,7 @@ Example command line:
|
||||
/path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
```
|
||||
|
||||
If you need collecting only Influx data, then the following command line would be enough:
|
||||
If you only need to collect Influx data, then the following is sufficient:
|
||||
|
||||
```
|
||||
/path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
|
||||
@@ -79,14 +78,14 @@ See [the corresponding Makefile rules](https://github.com/VictoriaMetrics/Victor
|
||||
#### Drop-in replacement for Prometheus
|
||||
|
||||
If you use Prometheus only for scraping metrics from various targets and forwarding these metrics to remote storage,
|
||||
then `vmagent` can replace such Prometheus setup. Usually `vmagent` requires lower amounts of RAM, CPU and network bandwidth comparing to Prometheus for such setup.
|
||||
then `vmagent` can replace such Prometheus setup. Usually `vmagent` requires lower amounts of RAM, CPU and network bandwidth comparing to Prometheus for such a setup.
|
||||
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
|
||||
|
||||
|
||||
#### Replication and high availability
|
||||
|
||||
`vmagent` replicates the collected metrics among multiple remote storage instances configured via `-remoteWrite.url` args.
|
||||
If a single remote storage instance temporarily goes out of service, then the collected data remains available in another remote storage instances.
|
||||
If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instances.
|
||||
`vmagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again.
|
||||
Then it sends the buffered data to the remote storage in order to prevent data gaps in the remote storage.
|
||||
|
||||
@@ -94,13 +93,13 @@ Then it sends the buffered data to the remote storage in order to prevent data g
|
||||
#### Relabeling and filtering
|
||||
|
||||
`vmagent` can add, remove or update labels on the collected data before sending it to remote storage. Additionally,
|
||||
it can remove unneeded samples via Prometheus-like relabeling before sending the collected data to remote storage.
|
||||
it can remove unwanted samples via Prometheus-like relabeling before sending the collected data to remote storage.
|
||||
See [these docs](#relabeling) for details.
|
||||
|
||||
|
||||
#### Splitting data streams among multiple systems
|
||||
|
||||
`vmagent` supports splitting of the collected data among muliple destinations with the help of `-remoteWrite.urlRelabelConfig`,
|
||||
`vmagent` supports splitting the collected data between muliple destinations with the help of `-remoteWrite.urlRelabelConfig`,
|
||||
which is applied independently for each configured `-remoteWrite.url` destination. For instance, it is possible to replicate or split
|
||||
data among long-term remote storage, short-term remote storage and real-time analytical system [built on top of Kafka](https://github.com/Telefonica/prometheus-kafka-adapter).
|
||||
Note that each destination can receive its own subset of the collected data thanks to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
|
||||
@@ -148,6 +147,10 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
|
||||
* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA).
|
||||
See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details.
|
||||
|
||||
Note that `vmagent` doesn't support `refresh_interval` option these scrape configs. Use the corresponding `-promscrape.*CheckInterval`
|
||||
command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs`
|
||||
entries to 60s. Run `vmagent -help` in order to see default values for `-promscrape.*CheckInterval` flags.
|
||||
|
||||
|
||||
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
|
||||
|
||||
@@ -200,12 +203,12 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be
|
||||
* When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
|
||||
by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`.
|
||||
|
||||
* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
|
||||
* It is recommended to increase `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
|
||||
and `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows.
|
||||
|
||||
* `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow big when remote storage is unavailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
|
||||
The directory can grow large when remote storage is unavailable for extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
|
||||
If you don't want to send all the data from the directory to remote storage, simply stop `vmagent` and delete the directory.
|
||||
|
||||
|
||||
### How to build from sources
|
||||
|
||||
@@ -13,7 +13,7 @@ sends alerts to [Alert Manager](https://github.com/prometheus/alertmanager).
|
||||
* Lightweight without extra dependencies.
|
||||
|
||||
### TODO:
|
||||
* Configuration hot reload.
|
||||
* Support recording rules.
|
||||
|
||||
### QuickStart
|
||||
|
||||
@@ -48,10 +48,11 @@ Rules in group evaluated one-by-one sequentially.
|
||||
* `http://<vmalert-addr>/api/v1/<groupName>/<alertID>/status" ` - get alert status by ID.
|
||||
Used as alert source in AlertManager.
|
||||
* `http://<vmalert-addr>/metrics` - application metrics.
|
||||
* `http://<vmalert-addr>/-/reload` - hot configuration reload.
|
||||
|
||||
`vmalert` may be configured with `-remotewrite` flag to write alerts state in form of timeseries
|
||||
`vmalert` may be configured with `-remoteWrite` flag to write alerts state in form of timeseries
|
||||
via remote write protocol. Alerts state will be written as `ALERTS` timeseries. These timeseries
|
||||
may be used to recover alerts state on `vmalert` restarts if `-remoteread` is configured.
|
||||
may be used to recover alerts state on `vmalert` restarts if `-remoteRead` is configured.
|
||||
|
||||
|
||||
### Configuration
|
||||
@@ -81,19 +82,21 @@ Usage of vmalert:
|
||||
Address to listen for http connections (default ":8880")
|
||||
-notifier.url string
|
||||
Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
|
||||
-remoteread.basicAuth.password string
|
||||
Optional basic auth password for -remoteread.url
|
||||
-remoteread.basicAuth.username string
|
||||
Optional basic auth username for -remoteread.url
|
||||
-remoteread.lookback duration
|
||||
-remoteRead.basicAuth.password string
|
||||
Optional basic auth password for -remoteRead.url
|
||||
-remoteRead.basicAuth.username string
|
||||
Optional basic auth username for -remoteRead.url
|
||||
-remoteRead.lookback duration
|
||||
Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
|
||||
-remoteread.url vmalert
|
||||
Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remotewrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
|
||||
-remotewrite.basicAuth.password string
|
||||
Optional basic auth password for -remotewrite.url
|
||||
-remotewrite.basicAuth.username string
|
||||
Optional basic auth username for -remotewrite.url
|
||||
-remotewrite.url string
|
||||
-remoteRead.url vmalert
|
||||
Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
|
||||
-remoteWrite.basicAuth.password string
|
||||
Optional basic auth password for -remoteWrite.url
|
||||
-remoteWrite.basicAuth.username string
|
||||
Optional basic auth username for -remoteWrite.url
|
||||
-remoteWrite.maxQueueSize
|
||||
Defines the max number of pending datapoints to remote write endpoint
|
||||
-remoteWrite.url string
|
||||
Optional URL to Victoria Metrics or VMInsert where to persist alerts state in form of timeseries. E.g. http://127.0.0.1:8428
|
||||
-rule value
|
||||
Path to the file with alert rules.
|
||||
@@ -109,8 +112,30 @@ Usage of vmalert:
|
||||
Pass `-help` to `vmalert` in order to see the full list of supported
|
||||
command-line flags with their descriptions.
|
||||
|
||||
To reload configuration without `vmalert` restart send SIGHUP signal
|
||||
or send GET request to `/-/reload` endpoint.
|
||||
|
||||
### Contributing
|
||||
|
||||
`vmalert` is mostly designed and built by VictoriaMetrics community.
|
||||
Feel free to share your experience and ideas for improving this
|
||||
software. Please keep simplicity as the main priority.
|
||||
|
||||
### How to build from sources
|
||||
|
||||
It is recommended using
|
||||
[binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
- `vmalert` is located in `vmutils-*` archives there.
|
||||
|
||||
|
||||
#### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
|
||||
2. Run `make vmalert` from the root folder of the repository.
|
||||
It builds `vmalert` binary and puts it into the `bin` folder.
|
||||
|
||||
#### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
2. Run `make vmalert-prod` from the root folder of the repository.
|
||||
It builds `vmalert-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -19,7 +19,7 @@ The port can be modified via `-httpListenAddr` command-line flag.
|
||||
|
||||
The auth config can be reloaded by passing `SIGHUP` signal to `vmauth`.
|
||||
|
||||
Docker images for `vmauth` are available at [https://hub.docker.com/r/victoriametrics/vmauth/tags].
|
||||
Docker images for `vmauth` are available [here](https://hub.docker.com/r/victoriametrics/vmauth/tags).
|
||||
|
||||
Pass `-help` to `vmauth` in order to see all the supported command-line flags with their descriptions.
|
||||
|
||||
|
||||
@@ -19,6 +19,9 @@ Backed up data can be restored with [vmrestore](https://github.com/VictoriaMetri
|
||||
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
|
||||
See also [vmbackuper](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) tool built on top of `vmbackup`. This tool simplifies
|
||||
creation of hourly, daily, weekly and monthly backups.
|
||||
|
||||
|
||||
### Use cases
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ vmrestore -src=gcs://<bucket>/<path/to/backup> -storageDataPath=<local/path/to/r
|
||||
* `<local/path/to/restore>` is the path to folder where data will be restored. This folder must be passed
|
||||
to VictoriaMetrics in `-storageDataPath` command-line flag after the restore process is complete.
|
||||
|
||||
The original `-storageDataPath` directory may contain old files. They will be susbstituted by the files from backup.
|
||||
The original `-storageDataPath` directory may contain old files. They will be susbstituted by the files from backup,
|
||||
i.e. the end result would be similar to [rsync --delete](https://askubuntu.com/questions/476041/how-do-i-make-rsync-delete-files-that-have-been-deleted-from-the-source-folder).
|
||||
|
||||
|
||||
### Troubleshooting
|
||||
@@ -52,7 +53,7 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files
|
||||
Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot data files bigger than 2^32 bytes in memory
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerLevel string
|
||||
@@ -68,8 +69,8 @@ Run `vmrestore -help` in order to see all the available options:
|
||||
-src string
|
||||
Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-storageDataPath string
|
||||
Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup (default "victoria-metrics-data")
|
||||
-version
|
||||
Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir is synchronized with -src contents, i.e. it works like 'rsync --delete' (default "victoria-metrics-data")
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
|
||||
26
go.mod
26
go.mod
@@ -1,17 +1,17 @@
|
||||
module github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
require (
|
||||
cloud.google.com/go/storage v1.6.0
|
||||
cloud.google.com/go/storage v1.8.0
|
||||
github.com/VictoriaMetrics/fastcache v1.5.7
|
||||
|
||||
// Do not use the original github.com/valyala/fasthttp because of issues
|
||||
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
|
||||
github.com/VictoriaMetrics/fasthttp v1.0.1
|
||||
github.com/VictoriaMetrics/metrics v1.11.2
|
||||
github.com/VictoriaMetrics/metricsql v0.1.0
|
||||
github.com/aws/aws-sdk-go v1.30.20
|
||||
github.com/VictoriaMetrics/metrics v1.11.3
|
||||
github.com/VictoriaMetrics/metricsql v0.2.3
|
||||
github.com/aws/aws-sdk-go v1.30.28
|
||||
github.com/cespare/xxhash/v2 v2.1.1
|
||||
github.com/golang/protobuf v1.4.1 // indirect
|
||||
github.com/golang/protobuf v1.4.2 // indirect
|
||||
github.com/golang/snappy v0.0.1
|
||||
github.com/klauspost/compress v1.10.5
|
||||
github.com/valyala/fastjson v1.5.1
|
||||
@@ -19,15 +19,15 @@ require (
|
||||
github.com/valyala/gozstd v1.7.0
|
||||
github.com/valyala/histogram v1.0.1
|
||||
github.com/valyala/quicktemplate v1.5.0
|
||||
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5 // indirect
|
||||
golang.org/x/mod v0.3.0 // indirect
|
||||
golang.org/x/net v0.0.0-20200513185701-a91f0712d120 // indirect
|
||||
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
|
||||
golang.org/x/sys v0.0.0-20200501145240-bc7a7d42d5c3
|
||||
golang.org/x/tools v0.0.0-20200504193531-9bfbc385433f // indirect
|
||||
google.golang.org/api v0.23.0
|
||||
google.golang.org/appengine v1.6.6 // indirect
|
||||
google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84 // indirect
|
||||
google.golang.org/grpc v1.29.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.2.8
|
||||
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9
|
||||
golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d // indirect
|
||||
google.golang.org/api v0.24.0
|
||||
google.golang.org/genproto v0.0.0-20200514193133-8feb7f20f2a2 // indirect
|
||||
gopkg.in/yaml.v2 v2.3.0
|
||||
honnef.co/go/tools v0.0.1-2020.1.4 // indirect
|
||||
)
|
||||
|
||||
go 1.13
|
||||
|
||||
70
go.sum
70
go.sum
@@ -8,12 +8,18 @@ cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg
|
||||
cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
|
||||
cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
|
||||
cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
|
||||
cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
|
||||
cloud.google.com/go v0.56.0 h1:WRz29PgAsVEyPSDHyk+0fpEkwEFyfhHn+JbksT6gIL4=
|
||||
cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
|
||||
cloud.google.com/go v0.57.0 h1:EpMNVUorLiZIELdMZbCYX/ByTFCdoYopYAGxaGVz9ms=
|
||||
cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
|
||||
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
|
||||
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
|
||||
cloud.google.com/go/bigquery v1.4.0 h1:xE3CPsOgttP4ACBePh79zTKALtXwn/Edhcr16R5hMWU=
|
||||
cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
|
||||
cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
|
||||
cloud.google.com/go/bigquery v1.7.0 h1:a/O/bK/vWrYGOTFtH8di4rBxMZnmkjy+Y5LxpDwo+dA=
|
||||
cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
|
||||
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
|
||||
cloud.google.com/go/datastore v1.1.0 h1:/May9ojXjRkPBNVrq+oWLqmWCkr4OU5uRY29bu0mRyQ=
|
||||
cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
|
||||
@@ -21,10 +27,14 @@ cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2k
|
||||
cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
|
||||
cloud.google.com/go/pubsub v1.2.0 h1:Lpy6hKgdcl7a3WGSfJIFmxmcdjSpP6OmBEfcOv1Y680=
|
||||
cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
|
||||
cloud.google.com/go/pubsub v1.3.1 h1:ukjixP1wl0LpnZ6LWtZJ0mX5tBmjp1f8Sqer8Z2OMUU=
|
||||
cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
|
||||
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
|
||||
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
|
||||
cloud.google.com/go/storage v1.6.0 h1:UDpwYIwla4jHGzZJaEJYx1tOejbgSoNqsAfHAUYe2r8=
|
||||
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
|
||||
cloud.google.com/go/storage v1.8.0 h1:86K1Gel7BQ9/WmNWn7dTKMvTLFzwtBe5FNqYbi9X35g=
|
||||
cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
|
||||
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
||||
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
@@ -35,12 +45,14 @@ github.com/VictoriaMetrics/fasthttp v1.0.1 h1:I7YdbswTIW63WxoFoUOSNxeOEGB46rdKUL
|
||||
github.com/VictoriaMetrics/fasthttp v1.0.1/go.mod h1:BqgsieH90PR7x97c89j+eqZDloKkDhAEQTwhLw6jw/4=
|
||||
github.com/VictoriaMetrics/metrics v1.11.2 h1:t/ceLP6SvagUqypCKU7cI7+tQn54+TIV/tGoxihHvx8=
|
||||
github.com/VictoriaMetrics/metrics v1.11.2/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
|
||||
github.com/VictoriaMetrics/metricsql v0.1.0 h1:IoyG84PCwFY15rNuxpr2nQ+YZBYIhnd7zTiaGo5BNpc=
|
||||
github.com/VictoriaMetrics/metricsql v0.1.0/go.mod h1:UIjd9S0W1UnTWlJdM0wLS+2pfuPqjwqKoK8yTos+WyE=
|
||||
github.com/VictoriaMetrics/metrics v1.11.3 h1:eSfXc0CrquKa1VTNUvhP+dhNjLUZHQGTFfp19mYCQWE=
|
||||
github.com/VictoriaMetrics/metrics v1.11.3/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
|
||||
github.com/VictoriaMetrics/metricsql v0.2.3 h1:xGscDmLoeIV7+8qX/mdHnOY0vu4m+wHIVGMoy/nBovY=
|
||||
github.com/VictoriaMetrics/metricsql v0.2.3/go.mod h1:UIjd9S0W1UnTWlJdM0wLS+2pfuPqjwqKoK8yTos+WyE=
|
||||
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
|
||||
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
|
||||
github.com/aws/aws-sdk-go v1.30.20 h1:ktsy2vodSZxz/arYqo7DlpkIeNohHL+4Rmjdo7YGtrE=
|
||||
github.com/aws/aws-sdk-go v1.30.20/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
|
||||
github.com/aws/aws-sdk-go v1.30.28 h1:SaPM7dlmp7h3Lj1nJ4jdzOkTdom08+g20k7AU5heZYg=
|
||||
github.com/aws/aws-sdk-go v1.30.28/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
|
||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
@@ -69,11 +81,13 @@ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb
|
||||
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
|
||||
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
|
||||
github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
|
||||
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
|
||||
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
|
||||
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
|
||||
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
||||
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
||||
@@ -83,6 +97,8 @@ github.com/golang/protobuf v1.4.0 h1:oOuy+ugB+P/kBdUnG5QaMXSIyJ1q38wWSojYCb3z5VQ
|
||||
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
||||
github.com/golang/protobuf v1.4.1 h1:ZFgWrT+bLgsYPirOnRfKLYJLvssAegOj/hgyMFdJZe0=
|
||||
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
|
||||
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
|
||||
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
||||
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
||||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
@@ -99,6 +115,7 @@ github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OI
|
||||
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
||||
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
||||
github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
||||
github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
||||
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
|
||||
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
|
||||
github.com/googleapis/gax-go/v2 v2.0.5 h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM=
|
||||
@@ -186,6 +203,8 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
|
||||
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||
golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
@@ -202,9 +221,14 @@ golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLL
|
||||
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5 h1:WQ8q63x+f/zpC8Ac1s9wLElVoHhm32p6tudrU72n1QA=
|
||||
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f h1:QBjCr1Fz5kw158VqdE9JfI9cJnl/ymnJWAdMuinqL7Y=
|
||||
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200513185701-a91f0712d120 h1:EZ3cVSzKOlJxAd8e8YAJ7no8nNypTxexh/YE/xW3ZEY=
|
||||
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
@@ -236,10 +260,14 @@ golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200501145240-bc7a7d42d5c3 h1:5B6i6EAiSYyejWfvc5Rc9BbI3rzIsrrXfAQBWnYfn+w=
|
||||
golang.org/x/sys v0.0.0-20200501145240-bc7a7d42d5c3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25 h1:OKbAoGs4fGM5cPLlVQLZGYkFC8OnOfgo6tt0Smf9XhM=
|
||||
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9 h1:YTzHMGlqJu67/uEo1lBv0n3wBXhXNeUbB1XfN2vmTm0=
|
||||
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
@@ -277,9 +305,14 @@ golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapK
|
||||
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
|
||||
golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
|
||||
golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
|
||||
golang.org/x/tools v0.0.0-20200504193531-9bfbc385433f h1:sPpKxOcxUWPeIIqQbn06sX/NBtDl45ZGdi67lZECBsw=
|
||||
golang.org/x/tools v0.0.0-20200504193531-9bfbc385433f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d h1:n6zwymXmN9rCClNNmCWwV3qwMmBcRw/WeIGDK8Qnzk4=
|
||||
golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||
@@ -293,9 +326,11 @@ google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb
|
||||
google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
|
||||
google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
|
||||
google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
|
||||
google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
|
||||
google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
|
||||
google.golang.org/api v0.23.0 h1:YlvGEOq2NA2my8cZ/9V8BcEO9okD48FlJcdqN0xJL3s=
|
||||
google.golang.org/api v0.23.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
|
||||
google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
|
||||
google.golang.org/api v0.24.0 h1:cG03eaksBzhfSIk7JRGctfp3lanklcOM/mTGvow7BbQ=
|
||||
google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
|
||||
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
||||
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
@@ -320,9 +355,16 @@ google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvx
|
||||
google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
|
||||
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84 h1:pSLkPbrjnPyLDYUO2VM9mDLqo2V6CFBY84lFSZAfoi4=
|
||||
google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380 h1:xriR1EgvKfkKxIoU2uUvrMVl+H26359loFFUleSMXFo=
|
||||
google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
||||
google.golang.org/genproto v0.0.0-20200514193133-8feb7f20f2a2 h1:RwW6+LxyOQJ7oeoZ76GIJlwt/O0J5cN2fk+q/jK27kQ=
|
||||
google.golang.org/genproto v0.0.0-20200514193133-8feb7f20f2a2/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
|
||||
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
|
||||
@@ -342,13 +384,15 @@ google.golang.org/protobuf v1.21.0 h1:qdOKuR/EIArgaWNjetjgTzgVTAZ+S/WXVrq9HW9zim
|
||||
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
||||
google.golang.org/protobuf v1.22.0 h1:cJv5/xdbk1NnMPR1VP9+HU6gupuG9MLBoH1r6RHZ2MY=
|
||||
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
|
||||
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
|
||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
|
||||
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
@@ -356,6 +400,8 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
|
||||
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
|
||||
honnef.co/go/tools v0.0.1-2020.1.3 h1:sXmLre5bzIR6ypkjXCDI3jHPssRhc8KD/Ome589sc3U=
|
||||
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
|
||||
honnef.co/go/tools v0.0.1-2020.1.4 h1:UoveltGrhghAA7ePc+e+QYDHXrBps2PqFZiHkGR/xK8=
|
||||
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
|
||||
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
|
||||
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
|
||||
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
|
||||
|
||||
@@ -113,10 +113,16 @@ func (r *Restore) Run() error {
|
||||
partsToDelete := common.PartsDifference(dstParts, srcParts)
|
||||
deleteSize := uint64(0)
|
||||
if len(partsToDelete) > 0 {
|
||||
// Fully remove local file if certain parts from the remote part are missing.
|
||||
// Remove only files with the missing part at offset 0.
|
||||
// Assume other files are partially downloaded during the previous Restore.Run call,
|
||||
// so only the last part in them may be incomplete.
|
||||
// The last part for partially downloaded files will be re-downloaded later.
|
||||
// This addresses https://github.com/VictoriaMetrics/VictoriaMetrics/issues/487 .
|
||||
pathsToDelete := make(map[string]bool)
|
||||
for _, p := range partsToDelete {
|
||||
pathsToDelete[p.Path] = true
|
||||
if p.Offset == 0 {
|
||||
pathsToDelete[p.Path] = true
|
||||
}
|
||||
}
|
||||
logger.Infof("deleting %d files from %s", len(pathsToDelete), dst)
|
||||
for path := range pathsToDelete {
|
||||
@@ -153,7 +159,8 @@ func (r *Restore) Run() error {
|
||||
logger.Infof("downloading %d parts from %s to %s", len(partsToCopy), src, dst)
|
||||
bytesDownloaded := uint64(0)
|
||||
err = runParallelPerPath(concurrency, perPath, func(parts []common.Part) error {
|
||||
// Sort partsToCopy in order to properly grow file size during downloading.
|
||||
// Sort partsToCopy in order to properly grow file size during downloading
|
||||
// and to properly resume downloading of incomplete files on the next Restore.Run call.
|
||||
common.SortParts(parts)
|
||||
for _, p := range parts {
|
||||
logger.Infof("downloading %s from %s to %s", &p, src, dst)
|
||||
@@ -169,7 +176,7 @@ func (r *Restore) Run() error {
|
||||
return fmt.Errorf("cannot download %s to %s: %s", &p, dst, err)
|
||||
}
|
||||
if err := wc.Close(); err != nil {
|
||||
return fmt.Errorf("cannot close reader fro %s from %s: %s", &p, src, err)
|
||||
return fmt.Errorf("cannot close reader from %s from %s: %s", &p, src, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -230,6 +230,6 @@ func NewRemoteFS(path string) (common.RemoteFS, error) {
|
||||
}
|
||||
return fs, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported scheme %q in `-dst`", scheme)
|
||||
return nil, fmt.Errorf("unsupported scheme %q", scheme)
|
||||
}
|
||||
}
|
||||
|
||||
40
lib/fasttime/fasttime.go
Normal file
40
lib/fasttime/fasttime.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package fasttime
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
func init() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(time.Second)
|
||||
defer ticker.Stop()
|
||||
for tm := range ticker.C {
|
||||
t := uint64(tm.Unix())
|
||||
atomic.StoreUint64(¤tTimestamp, t)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
var currentTimestamp = uint64(time.Now().Unix())
|
||||
|
||||
// UnixTimestamp returns the current unix timestamp in seconds.
|
||||
//
|
||||
// It is faster than time.Now().Unix()
|
||||
func UnixTimestamp() uint64 {
|
||||
return atomic.LoadUint64(¤tTimestamp)
|
||||
}
|
||||
|
||||
// UnixDate returns date from the current unix timestamp.
|
||||
//
|
||||
// The date is calculated by dividing unix timestamp by (24*3600)
|
||||
func UnixDate() uint64 {
|
||||
return UnixTimestamp() / (24 * 3600)
|
||||
}
|
||||
|
||||
// UnixHour returns hour from the current unix timestamp.
|
||||
//
|
||||
// The hour is calculated by dividing unix timestamp by 3600
|
||||
func UnixHour() uint64 {
|
||||
return UnixTimestamp() / 3600
|
||||
}
|
||||
30
lib/fasttime/fasttime_test.go
Normal file
30
lib/fasttime/fasttime_test.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package fasttime
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestUnixTimestamp(t *testing.T) {
|
||||
tsExpected := uint64(time.Now().Unix())
|
||||
ts := UnixTimestamp()
|
||||
if ts-tsExpected > 1 {
|
||||
t.Fatalf("unexpected UnixTimestamp; got %d; want %d", ts, tsExpected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnixDate(t *testing.T) {
|
||||
dateExpected := uint64(time.Now().Unix() / (24 * 3600))
|
||||
date := UnixDate()
|
||||
if date-dateExpected > 1 {
|
||||
t.Fatalf("unexpected UnixDate; got %d; want %d", date, dateExpected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnixHour(t *testing.T) {
|
||||
hourExpected := uint64(time.Now().Unix() / 3600)
|
||||
hour := UnixHour()
|
||||
if hour-hourExpected > 1 {
|
||||
t.Fatalf("unexpected UnixHour; got %d; want %d", hour, hourExpected)
|
||||
}
|
||||
}
|
||||
32
lib/fasttime/fasttime_timing_test.go
Normal file
32
lib/fasttime/fasttime_timing_test.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package fasttime
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func BenchmarkUnixTimestamp(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var ts uint64
|
||||
for pb.Next() {
|
||||
ts += UnixTimestamp()
|
||||
}
|
||||
atomic.StoreUint64(&Sink, ts)
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkTimeNowUnix(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var ts uint64
|
||||
for pb.Next() {
|
||||
ts += uint64(time.Now().Unix())
|
||||
}
|
||||
atomic.StoreUint64(&Sink, ts)
|
||||
})
|
||||
}
|
||||
|
||||
// Sink should prevent from code elimination by optimizing compiler
|
||||
var Sink uint64
|
||||
@@ -10,7 +10,10 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var disableMmap = flag.Bool("fs.disableMmap", false, "Whether to use pread() instead of mmap() for reading data files")
|
||||
var disableMmap = flag.Bool("fs.disableMmap", is32BitPtr, "Whether to use pread() instead of mmap() for reading data files. "+
|
||||
"By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot data files bigger than 2^32 bytes in memory")
|
||||
|
||||
const is32BitPtr = (^uintptr(0) >> 32) == 0
|
||||
|
||||
// MustReadAtCloser is rand-access read interface.
|
||||
type MustReadAtCloser interface {
|
||||
|
||||
@@ -148,18 +148,18 @@ func unmarshalBlockHeaders(dst []blockHeader, src []byte, blockHeadersCount int)
|
||||
for i := 0; i < blockHeadersCount; i++ {
|
||||
tail, err := dst[dstLen+i].Unmarshal(src)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal block header: %s", err)
|
||||
return dst, fmt.Errorf("cannot unmarshal block header: %s", err)
|
||||
}
|
||||
src = tail
|
||||
}
|
||||
if len(src) > 0 {
|
||||
return nil, fmt.Errorf("unexpected non-zero tail left after unmarshaling %d block headers; len(tail)=%d", blockHeadersCount, len(src))
|
||||
return dst, fmt.Errorf("unexpected non-zero tail left after unmarshaling %d block headers; len(tail)=%d", blockHeadersCount, len(src))
|
||||
}
|
||||
newBHS := dst[dstLen:]
|
||||
|
||||
// Verify that block headers are sorted by firstItem.
|
||||
if !sort.SliceIsSorted(newBHS, func(i, j int) bool { return string(newBHS[i].firstItem) < string(newBHS[j].firstItem) }) {
|
||||
return nil, fmt.Errorf("block headers must be sorted by firstItem; unmarshaled unsorted block headers: %#v", newBHS)
|
||||
return dst, fmt.Errorf("block headers must be sorted by firstItem; unmarshaled unsorted block headers: %#v", newBHS)
|
||||
}
|
||||
|
||||
return dst, nil
|
||||
|
||||
@@ -63,9 +63,12 @@ func (bsw *blockStreamWriter) reset() {
|
||||
bsw.mrFirstItemCaught = false
|
||||
}
|
||||
|
||||
func (bsw *blockStreamWriter) InitFromInmemoryPart(ip *inmemoryPart, compressLevel int) {
|
||||
func (bsw *blockStreamWriter) InitFromInmemoryPart(ip *inmemoryPart) {
|
||||
bsw.reset()
|
||||
bsw.compressLevel = compressLevel
|
||||
|
||||
// Use the minimum compression level for in-memory blocks,
|
||||
// since they are going to be re-compressed during the merge into file-based blocks.
|
||||
bsw.compressLevel = -5 // See https://github.com/facebook/zstd/releases/tag/v1.3.4
|
||||
|
||||
bsw.metaindexWriter = &ip.metaindexData
|
||||
bsw.indexWriter = &ip.indexData
|
||||
|
||||
@@ -151,7 +151,7 @@ var isInTest = func() bool {
|
||||
return strings.HasSuffix(os.Args[0], ".test")
|
||||
}()
|
||||
|
||||
// MarshalUnsortedData marshals sorted items from ib to sb.
|
||||
// MarshalSortedData marshals sorted items from ib to sb.
|
||||
//
|
||||
// It also:
|
||||
// - appends first item to firstItemDst and returns the result.
|
||||
|
||||
@@ -29,14 +29,14 @@ func TestMultilevelMerge(t *testing.T) {
|
||||
// First level merge
|
||||
var dstIP1 inmemoryPart
|
||||
var bsw1 blockStreamWriter
|
||||
bsw1.InitFromInmemoryPart(&dstIP1, 0)
|
||||
bsw1.InitFromInmemoryPart(&dstIP1)
|
||||
if err := mergeBlockStreams(&dstIP1.ph, &bsw1, bsrs[:5], nil, nil, &itemsMerged); err != nil {
|
||||
t.Fatalf("cannot merge first level part 1: %s", err)
|
||||
}
|
||||
|
||||
var dstIP2 inmemoryPart
|
||||
var bsw2 blockStreamWriter
|
||||
bsw2.InitFromInmemoryPart(&dstIP2, 0)
|
||||
bsw2.InitFromInmemoryPart(&dstIP2)
|
||||
if err := mergeBlockStreams(&dstIP2.ph, &bsw2, bsrs[5:], nil, nil, &itemsMerged); err != nil {
|
||||
t.Fatalf("cannot merge first level part 2: %s", err)
|
||||
}
|
||||
@@ -53,7 +53,7 @@ func TestMultilevelMerge(t *testing.T) {
|
||||
newTestBlockStreamReader(&dstIP1),
|
||||
newTestBlockStreamReader(&dstIP2),
|
||||
}
|
||||
bsw.InitFromInmemoryPart(&dstIP, 0)
|
||||
bsw.InitFromInmemoryPart(&dstIP)
|
||||
if err := mergeBlockStreams(&dstIP.ph, &bsw, bsrsTop, nil, nil, &itemsMerged); err != nil {
|
||||
t.Fatalf("cannot merge second level: %s", err)
|
||||
}
|
||||
@@ -72,7 +72,7 @@ func TestMergeForciblyStop(t *testing.T) {
|
||||
bsrs, _ := newTestInmemoryBlockStreamReaders(20, 4000)
|
||||
var dstIP inmemoryPart
|
||||
var bsw blockStreamWriter
|
||||
bsw.InitFromInmemoryPart(&dstIP, 0)
|
||||
bsw.InitFromInmemoryPart(&dstIP)
|
||||
ch := make(chan struct{})
|
||||
var itemsMerged uint64
|
||||
close(ch)
|
||||
@@ -119,7 +119,7 @@ func testMergeBlockStreamsSerial(blocksToMerge, maxItemsPerBlock int) error {
|
||||
var itemsMerged uint64
|
||||
var dstIP inmemoryPart
|
||||
var bsw blockStreamWriter
|
||||
bsw.InitFromInmemoryPart(&dstIP, 0)
|
||||
bsw.InitFromInmemoryPart(&dstIP)
|
||||
if err := mergeBlockStreams(&dstIP.ph, &bsw, bsrs, nil, nil, &itemsMerged); err != nil {
|
||||
return fmt.Errorf("cannot merge block streams: %s", err)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -227,7 +228,7 @@ func (idxbc *indexBlockCache) cleaner() {
|
||||
}
|
||||
|
||||
func (idxbc *indexBlockCache) cleanByTimeout() {
|
||||
currentTime := atomic.LoadUint64(¤tTimestamp)
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
idxbc.mu.Lock()
|
||||
for k, idxbe := range idxbc.m {
|
||||
// Delete items accessed more than 10 minutes ago.
|
||||
@@ -245,7 +246,7 @@ func (idxbc *indexBlockCache) Get(k uint64) *indexBlock {
|
||||
idxbc.mu.RUnlock()
|
||||
|
||||
if idxbe != nil {
|
||||
currentTime := atomic.LoadUint64(¤tTimestamp)
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
if atomic.LoadUint64(&idxbe.lastAccessTime) != currentTime {
|
||||
atomic.StoreUint64(&idxbe.lastAccessTime, currentTime)
|
||||
}
|
||||
@@ -256,9 +257,7 @@ func (idxbc *indexBlockCache) Get(k uint64) *indexBlock {
|
||||
}
|
||||
|
||||
// Put puts idxb under the key k into idxbc.
|
||||
//
|
||||
// Returns true if the idxb has been put into idxbc.
|
||||
func (idxbc *indexBlockCache) Put(k uint64, idxb *indexBlock) bool {
|
||||
func (idxbc *indexBlockCache) Put(k uint64, idxb *indexBlock) {
|
||||
idxbc.mu.Lock()
|
||||
|
||||
// Remove superflouos entries.
|
||||
@@ -278,12 +277,11 @@ func (idxbc *indexBlockCache) Put(k uint64, idxb *indexBlock) bool {
|
||||
|
||||
// Store idxb in the cache.
|
||||
idxbe := &indexBlockCacheEntry{
|
||||
lastAccessTime: atomic.LoadUint64(¤tTimestamp),
|
||||
lastAccessTime: fasttime.UnixTimestamp(),
|
||||
idxb: idxb,
|
||||
}
|
||||
idxbc.m[k] = idxbe
|
||||
idxbc.mu.Unlock()
|
||||
return true
|
||||
}
|
||||
|
||||
func (idxbc *indexBlockCache) Len() uint64 {
|
||||
@@ -377,7 +375,7 @@ func (ibc *inmemoryBlockCache) cleaner() {
|
||||
}
|
||||
|
||||
func (ibc *inmemoryBlockCache) cleanByTimeout() {
|
||||
currentTime := atomic.LoadUint64(¤tTimestamp)
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
ibc.mu.Lock()
|
||||
for k, ibe := range ibc.m {
|
||||
// Delete items accessed more than 10 minutes ago.
|
||||
@@ -396,7 +394,7 @@ func (ibc *inmemoryBlockCache) Get(k inmemoryBlockCacheKey) *inmemoryBlock {
|
||||
ibc.mu.RUnlock()
|
||||
|
||||
if ibe != nil {
|
||||
currentTime := atomic.LoadUint64(¤tTimestamp)
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
if atomic.LoadUint64(&ibe.lastAccessTime) != currentTime {
|
||||
atomic.StoreUint64(&ibe.lastAccessTime, currentTime)
|
||||
}
|
||||
@@ -407,9 +405,7 @@ func (ibc *inmemoryBlockCache) Get(k inmemoryBlockCacheKey) *inmemoryBlock {
|
||||
}
|
||||
|
||||
// Put puts ib under key k into ibc.
|
||||
//
|
||||
// Returns true if ib was put into ibc.
|
||||
func (ibc *inmemoryBlockCache) Put(k inmemoryBlockCacheKey, ib *inmemoryBlock) bool {
|
||||
func (ibc *inmemoryBlockCache) Put(k inmemoryBlockCacheKey, ib *inmemoryBlock) {
|
||||
ibc.mu.Lock()
|
||||
|
||||
// Clean superflouos entries in cache.
|
||||
@@ -429,12 +425,11 @@ func (ibc *inmemoryBlockCache) Put(k inmemoryBlockCacheKey, ib *inmemoryBlock) b
|
||||
|
||||
// Store ib in the cache.
|
||||
ibe := &inmemoryBlockCacheEntry{
|
||||
lastAccessTime: atomic.LoadUint64(¤tTimestamp),
|
||||
lastAccessTime: fasttime.UnixTimestamp(),
|
||||
ib: ib,
|
||||
}
|
||||
ibc.m[k] = ibe
|
||||
ibc.mu.Unlock()
|
||||
return true
|
||||
}
|
||||
|
||||
func (ibc *inmemoryBlockCache) Len() uint64 {
|
||||
@@ -451,16 +446,3 @@ func (ibc *inmemoryBlockCache) Requests() uint64 {
|
||||
func (ibc *inmemoryBlockCache) Misses() uint64 {
|
||||
return atomic.LoadUint64(&ibc.misses)
|
||||
}
|
||||
|
||||
func init() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(time.Second)
|
||||
defer ticker.Stop()
|
||||
for tm := range ticker.C {
|
||||
t := uint64(tm.Unix())
|
||||
atomic.StoreUint64(¤tTimestamp, t)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
var currentTimestamp uint64
|
||||
|
||||
@@ -25,9 +25,6 @@ type partSearch struct {
|
||||
// The remaining block headers to scan in the current metaindexRow.
|
||||
bhs []blockHeader
|
||||
|
||||
// Pointer to index block, which may be reused.
|
||||
indexBlockReuse *indexBlock
|
||||
|
||||
// Pointer to inmemory block, which may be reused.
|
||||
inmemoryBlockReuse *inmemoryBlock
|
||||
|
||||
@@ -53,10 +50,6 @@ func (ps *partSearch) reset() {
|
||||
ps.p = nil
|
||||
ps.mrs = nil
|
||||
ps.bhs = nil
|
||||
if ps.indexBlockReuse != nil {
|
||||
putIndexBlock(ps.indexBlockReuse)
|
||||
ps.indexBlockReuse = nil
|
||||
}
|
||||
if ps.inmemoryBlockReuse != nil {
|
||||
putInmemoryBlock(ps.inmemoryBlockReuse)
|
||||
ps.inmemoryBlockReuse = nil
|
||||
@@ -275,40 +268,25 @@ func (ps *partSearch) nextBlock() error {
|
||||
}
|
||||
|
||||
func (ps *partSearch) nextBHS() error {
|
||||
if ps.indexBlockReuse != nil {
|
||||
putIndexBlock(ps.indexBlockReuse)
|
||||
ps.indexBlockReuse = nil
|
||||
}
|
||||
if len(ps.mrs) == 0 {
|
||||
return io.EOF
|
||||
}
|
||||
mr := &ps.mrs[0]
|
||||
ps.mrs = ps.mrs[1:]
|
||||
idxb, mayReuseIndexBlock, err := ps.getIndexBlock(mr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot get index block: %s", err)
|
||||
}
|
||||
if mayReuseIndexBlock {
|
||||
ps.indexBlockReuse = idxb
|
||||
idxbKey := mr.indexBlockOffset
|
||||
idxb := ps.idxbCache.Get(idxbKey)
|
||||
if idxb == nil {
|
||||
var err error
|
||||
idxb, err = ps.readIndexBlock(mr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read index block: %s", err)
|
||||
}
|
||||
ps.idxbCache.Put(idxbKey, idxb)
|
||||
}
|
||||
ps.bhs = idxb.bhs
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ps *partSearch) getIndexBlock(mr *metaindexRow) (*indexBlock, bool, error) {
|
||||
idxbKey := mr.indexBlockOffset
|
||||
idxb := ps.idxbCache.Get(idxbKey)
|
||||
if idxb != nil {
|
||||
return idxb, false, nil
|
||||
}
|
||||
idxb, err := ps.readIndexBlock(mr)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
ok := ps.idxbCache.Put(idxbKey, idxb)
|
||||
return idxb, !ok, nil
|
||||
}
|
||||
|
||||
func (ps *partSearch) readIndexBlock(mr *metaindexRow) (*indexBlock, error) {
|
||||
ps.compressedIndexBuf = bytesutil.Resize(ps.compressedIndexBuf, int(mr.indexBlockSize))
|
||||
ps.p.indexFile.MustReadAt(ps.compressedIndexBuf, int64(mr.indexBlockOffset))
|
||||
@@ -347,8 +325,8 @@ func (ps *partSearch) getInmemoryBlock(bh *blockHeader) (*inmemoryBlock, bool, e
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
ok := ps.ibCache.Put(ibKey, ib)
|
||||
return ib, !ok, nil
|
||||
ps.ibCache.Put(ibKey, ib)
|
||||
return ib, false, nil
|
||||
}
|
||||
|
||||
func (ps *partSearch) readInmemoryBlock(bh *blockHeader) (*inmemoryBlock, error) {
|
||||
|
||||
@@ -149,7 +149,7 @@ func newTestPart(blocksCount, maxItemsPerBlock int) (*part, []string, error) {
|
||||
var itemsMerged uint64
|
||||
var ip inmemoryPart
|
||||
var bsw blockStreamWriter
|
||||
bsw.InitFromInmemoryPart(&ip, 0)
|
||||
bsw.InitFromInmemoryPart(&ip)
|
||||
if err := mergeBlockStreams(&ip.ph, &bsw, bsrs, nil, nil, &itemsMerged); err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot merge blocks: %s", err)
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -102,7 +103,7 @@ type Table struct {
|
||||
|
||||
rawItemsBlocks []*inmemoryBlock
|
||||
rawItemsLock sync.Mutex
|
||||
rawItemsLastFlushTime time.Time
|
||||
rawItemsLastFlushTime uint64
|
||||
|
||||
snapshotLock sync.RWMutex
|
||||
|
||||
@@ -369,7 +370,7 @@ func (tb *Table) AddItems(items [][]byte) error {
|
||||
if len(tb.rawItemsBlocks) >= 1024 {
|
||||
blocksToMerge = tb.rawItemsBlocks
|
||||
tb.rawItemsBlocks = nil
|
||||
tb.rawItemsLastFlushTime = time.Now()
|
||||
tb.rawItemsLastFlushTime = fasttime.UnixTimestamp()
|
||||
}
|
||||
tb.rawItemsLock.Unlock()
|
||||
|
||||
@@ -508,11 +509,15 @@ func (tb *Table) flushRawItems(isFinal bool) {
|
||||
defer tb.rawItemsPendingFlushesWG.Done()
|
||||
|
||||
mustFlush := false
|
||||
currentTime := time.Now()
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
flushSeconds := int64(rawItemsFlushInterval.Seconds())
|
||||
if flushSeconds <= 0 {
|
||||
flushSeconds = 1
|
||||
}
|
||||
var blocksToMerge []*inmemoryBlock
|
||||
|
||||
tb.rawItemsLock.Lock()
|
||||
if isFinal || currentTime.Sub(tb.rawItemsLastFlushTime) > rawItemsFlushInterval {
|
||||
if isFinal || currentTime-tb.rawItemsLastFlushTime > uint64(flushSeconds) {
|
||||
mustFlush = true
|
||||
blocksToMerge = tb.rawItemsBlocks
|
||||
tb.rawItemsBlocks = nil
|
||||
@@ -619,9 +624,8 @@ func (tb *Table) mergeInmemoryBlocks(blocksToMerge []*inmemoryBlock) *partWrappe
|
||||
|
||||
// Prepare blockStreamWriter for destination part.
|
||||
bsw := getBlockStreamWriter()
|
||||
compressLevel := 1
|
||||
mpDst := getInmemoryPart()
|
||||
bsw.InitFromInmemoryPart(mpDst, compressLevel)
|
||||
bsw.InitFromInmemoryPart(mpDst)
|
||||
|
||||
// Merge parts.
|
||||
// The merge shouldn't be interrupted by stopCh,
|
||||
@@ -674,7 +678,7 @@ const (
|
||||
|
||||
func (tb *Table) partMerger() error {
|
||||
sleepTime := minMergeSleepTime
|
||||
var lastMergeTime time.Time
|
||||
var lastMergeTime uint64
|
||||
isFinal := false
|
||||
t := time.NewTimer(sleepTime)
|
||||
for {
|
||||
@@ -682,7 +686,7 @@ func (tb *Table) partMerger() error {
|
||||
if err == nil {
|
||||
// Try merging additional parts.
|
||||
sleepTime = minMergeSleepTime
|
||||
lastMergeTime = time.Now()
|
||||
lastMergeTime = fasttime.UnixTimestamp()
|
||||
isFinal = false
|
||||
continue
|
||||
}
|
||||
@@ -693,10 +697,10 @@ func (tb *Table) partMerger() error {
|
||||
if err != errNothingToMerge {
|
||||
return err
|
||||
}
|
||||
if time.Since(lastMergeTime) > 30*time.Second {
|
||||
if fasttime.UnixTimestamp()-lastMergeTime > 30 {
|
||||
// We have free time for merging into bigger parts.
|
||||
// This should improve select performance.
|
||||
lastMergeTime = time.Now()
|
||||
lastMergeTime = fasttime.UnixTimestamp()
|
||||
isFinal = true
|
||||
continue
|
||||
}
|
||||
@@ -764,8 +768,10 @@ func (tb *Table) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isOuterP
|
||||
}
|
||||
|
||||
outItemsCount := uint64(0)
|
||||
outBlocksCount := uint64(0)
|
||||
for _, pw := range pws {
|
||||
outItemsCount += pw.p.ph.itemsCount
|
||||
outBlocksCount += pw.p.ph.blocksCount
|
||||
}
|
||||
nocache := true
|
||||
if outItemsCount < maxItemsPerCachedPart() {
|
||||
@@ -778,7 +784,7 @@ func (tb *Table) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isOuterP
|
||||
mergeIdx := tb.nextMergeIdx()
|
||||
tmpPartPath := fmt.Sprintf("%s/tmp/%016X", tb.path, mergeIdx)
|
||||
bsw := getBlockStreamWriter()
|
||||
compressLevel := getCompressLevelForPartItems(outItemsCount)
|
||||
compressLevel := getCompressLevelForPartItems(outItemsCount, outBlocksCount)
|
||||
if err := bsw.InitFromFilePart(tmpPartPath, nocache, compressLevel); err != nil {
|
||||
return fmt.Errorf("cannot create destination part %q: %s", tmpPartPath, err)
|
||||
}
|
||||
@@ -863,27 +869,43 @@ func (tb *Table) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isOuterP
|
||||
|
||||
d := time.Since(startTime)
|
||||
if d > 10*time.Second {
|
||||
logger.Infof("merged %d items in %.3f seconds at %d items/sec to %q; sizeBytes: %d",
|
||||
outItemsCount, d.Seconds(), int(float64(outItemsCount)/d.Seconds()), dstPartPath, newPSize)
|
||||
logger.Infof("merged %d items across %d blocks in %.3f seconds at %d items/sec to %q; sizeBytes: %d",
|
||||
outItemsCount, outBlocksCount, d.Seconds(), int(float64(outItemsCount)/d.Seconds()), dstPartPath, newPSize)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCompressLevelForPartItems(itemsCount uint64) int {
|
||||
if itemsCount < 1<<19 {
|
||||
func getCompressLevelForPartItems(itemsCount, blocksCount uint64) int {
|
||||
// There is no need in using blocksCount here, since mergeset blocks are usually full.
|
||||
|
||||
if itemsCount <= 1<<16 {
|
||||
// -5 is the minimum supported compression for zstd.
|
||||
// See https://github.com/facebook/zstd/releases/tag/v1.3.4
|
||||
return -5
|
||||
}
|
||||
if itemsCount <= 1<<17 {
|
||||
return -4
|
||||
}
|
||||
if itemsCount <= 1<<18 {
|
||||
return -3
|
||||
}
|
||||
if itemsCount <= 1<<19 {
|
||||
return -2
|
||||
}
|
||||
if itemsCount <= 1<<20 {
|
||||
return -1
|
||||
}
|
||||
if itemsCount <= 1<<22 {
|
||||
return 1
|
||||
}
|
||||
if itemsCount < 1<<22 {
|
||||
if itemsCount <= 1<<25 {
|
||||
return 2
|
||||
}
|
||||
if itemsCount < 1<<25 {
|
||||
if itemsCount <= 1<<28 {
|
||||
return 3
|
||||
}
|
||||
if itemsCount < 1<<28 {
|
||||
return 4
|
||||
}
|
||||
return 5
|
||||
return 4
|
||||
}
|
||||
|
||||
func (tb *Table) nextMergeIdx() uint64 {
|
||||
@@ -892,15 +914,15 @@ func (tb *Table) nextMergeIdx() uint64 {
|
||||
|
||||
var (
|
||||
maxOutPartItemsLock sync.Mutex
|
||||
maxOutPartItemsDeadline time.Time
|
||||
maxOutPartItemsDeadline uint64
|
||||
lastMaxOutPartItems uint64
|
||||
)
|
||||
|
||||
func (tb *Table) maxOutPartItems() uint64 {
|
||||
maxOutPartItemsLock.Lock()
|
||||
if time.Until(maxOutPartItemsDeadline) < 0 {
|
||||
if maxOutPartItemsDeadline < fasttime.UnixTimestamp() {
|
||||
lastMaxOutPartItems = tb.maxOutPartItemsSlow()
|
||||
maxOutPartItemsDeadline = time.Now().Add(time.Second)
|
||||
maxOutPartItemsDeadline = fasttime.UnixTimestamp() + 2
|
||||
}
|
||||
n := lastMaxOutPartItems
|
||||
maxOutPartItemsLock.Unlock()
|
||||
|
||||
@@ -75,6 +75,8 @@ func (fq *FastQueue) flushInmemoryBlocksToFileLocked() {
|
||||
fq.pendingInmemoryBytes -= uint64(len(bb.B))
|
||||
blockBufPool.Put(bb)
|
||||
}
|
||||
// Unblock all the potentially blocked readers, so they could proceed with reading file-based queue.
|
||||
fq.cond.Broadcast()
|
||||
}
|
||||
|
||||
// GetPendingBytes returns the number of pending bytes in the fq.
|
||||
@@ -120,10 +122,10 @@ func (fq *FastQueue) MustWriteBlock(block []byte) {
|
||||
bb.B = append(bb.B[:0], block...)
|
||||
fq.ch <- bb
|
||||
fq.pendingInmemoryBytes += uint64(len(block))
|
||||
if len(fq.ch) == 1 {
|
||||
// Notify potentially blocked reader
|
||||
fq.cond.Signal()
|
||||
}
|
||||
|
||||
// Notify potentially blocked reader.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/484 for the context.
|
||||
fq.cond.Signal()
|
||||
}
|
||||
|
||||
// MustReadBlock reads the next block from fq to dst and returns it.
|
||||
@@ -137,7 +139,7 @@ func (fq *FastQueue) MustReadBlock(dst []byte) ([]byte, bool) {
|
||||
}
|
||||
if len(fq.ch) > 0 {
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
logger.Panicf("BUG: the file-based queue must be empty when the inmemory queue is empty; it contains %d pending bytes", n)
|
||||
logger.Panicf("BUG: the file-based queue must be empty when the inmemory queue is non-empty; it contains %d pending bytes", n)
|
||||
}
|
||||
bb := <-fq.ch
|
||||
fq.pendingInmemoryBytes -= uint64(len(bb.B))
|
||||
|
||||
@@ -383,13 +383,13 @@ func (q *Queue) MustWriteBlock(block []byte) {
|
||||
return
|
||||
}
|
||||
}
|
||||
mustNotifyReader := q.readerOffset == q.writerOffset
|
||||
if err := q.writeBlockLocked(block); err != nil {
|
||||
logger.Panicf("FATAL: %s", err)
|
||||
}
|
||||
if mustNotifyReader {
|
||||
q.cond.Signal()
|
||||
}
|
||||
|
||||
// Notify blocked reader if any.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/484 for details.
|
||||
q.cond.Signal()
|
||||
}
|
||||
|
||||
var blockBufPool bytesutil.ByteBufferPool
|
||||
|
||||
@@ -69,9 +69,8 @@ func (ac *Config) NewTLSConfig() *tls.Config {
|
||||
ClientSessionCache: tls.NewLRUClientSessionCache(0),
|
||||
}
|
||||
if ac.TLSCertificate != nil {
|
||||
tlsCfg.GetClientCertificate = func(*tls.CertificateRequestInfo) (*tls.Certificate, error) {
|
||||
return ac.TLSCertificate, nil
|
||||
}
|
||||
// Do not set tlsCfg.GetClientCertificate, since tlsCfg.Certificates should work OK.
|
||||
tlsCfg.Certificates = []tls.Certificate{*ac.TLSCertificate}
|
||||
}
|
||||
tlsCfg.ServerName = ac.TLSServerName
|
||||
tlsCfg.InsecureSkipVerify = ac.TLSInsecureSkipVerify
|
||||
|
||||
@@ -21,10 +21,15 @@ func MarshalWriteRequest(dst []byte, wr *WriteRequest) []byte {
|
||||
|
||||
// ResetWriteRequest resets wr.
|
||||
func ResetWriteRequest(wr *WriteRequest) {
|
||||
for i := range wr.Timeseries {
|
||||
ts := wr.Timeseries[i]
|
||||
wr.Timeseries = ResetTimeSeries(wr.Timeseries)
|
||||
}
|
||||
|
||||
// ResetTimeSeries clears all the GC references from tss and returns an empty tss ready for further use.
|
||||
func ResetTimeSeries(tss []TimeSeries) []TimeSeries {
|
||||
for i := range tss {
|
||||
ts := tss[i]
|
||||
ts.Labels = nil
|
||||
ts.Samples = nil
|
||||
}
|
||||
wr.Timeseries = wr.Timeseries[:0]
|
||||
return tss[:0]
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -49,7 +48,6 @@ func newClient(sw *ScrapeWork) *client {
|
||||
Addr: host,
|
||||
Name: "vm_promscrape",
|
||||
Dial: statDial,
|
||||
DialDualStack: netutil.TCP6Enabled(),
|
||||
IsTLS: isTLS,
|
||||
TLSConfig: tlsCfg,
|
||||
MaxIdleConnDuration: 2 * sw.ScrapeInterval,
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
@@ -25,6 +26,9 @@ import (
|
||||
var (
|
||||
strictParse = flag.Bool("promscrape.config.strictParse", false, "Whether to allow only supported fields in '-promscrape.config'. "+
|
||||
"This option may be used for errors detection in '-promscrape.config' file")
|
||||
dryRun = flag.Bool("promscrape.config.dryRun", false, "Checks -promscrape.config file for errors and unsupported fields and then exits. "+
|
||||
"Returns non-zero exit code on parsing errors and emits these errors to stderr. "+
|
||||
"Pass -loggerLevel=ERROR if you don't need to see info messages in the output")
|
||||
)
|
||||
|
||||
// Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
@@ -114,6 +118,13 @@ func loadConfig(path string) (cfg *Config, data []byte, err error) {
|
||||
if err := cfgObj.parse(data, path); err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot parse Prometheus config from %q: %s", path, err)
|
||||
}
|
||||
if *dryRun {
|
||||
// This is a dirty hack for checking Prometheus config only.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/362
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/508 for details.
|
||||
logger.Infof("Success: the config at %q has no errors; exitting with 0 status code", path)
|
||||
os.Exit(0)
|
||||
}
|
||||
return &cfgObj, data, nil
|
||||
}
|
||||
|
||||
@@ -139,7 +150,7 @@ func (cfg *Config) parse(data []byte, path string) error {
|
||||
|
||||
func unmarshalMaybeStrict(data []byte, dst interface{}) error {
|
||||
var err error
|
||||
if *strictParse {
|
||||
if *strictParse || *dryRun {
|
||||
err = yaml.UnmarshalStrict(data, dst)
|
||||
} else {
|
||||
err = yaml.Unmarshal(data, dst)
|
||||
|
||||
@@ -50,7 +50,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
|
||||
tlsConfig := promauth.TLSConfig{
|
||||
CAFile: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
|
||||
}
|
||||
acNew, err := promauth.NewConfig("/", nil, "", "/var/run/secrets/kubernetes.io/serviceaccount/token", &tlsConfig)
|
||||
acNew, err := promauth.NewConfig(".", nil, "", "/var/run/secrets/kubernetes.io/serviceaccount/token", &tlsConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize service account auth: %s; probably, `kubernetes_sd_config->api_server` is missing in Prometheus configs?", err)
|
||||
}
|
||||
|
||||
@@ -2,17 +2,26 @@ package discoveryutils
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/fasthttp"
|
||||
)
|
||||
|
||||
var (
|
||||
maxConcurrency = flag.Int("promscrape.discovery.concurrency", 500, "The maximum number of concurrent requests to Prometheus autodiscovery API (Consul, Kubernetes, etc.)")
|
||||
maxWaitTime = flag.Duration("promscrape.discovery.concurrentWaitTime", time.Minute, "The maximum duration for waiting to perform API requests "+
|
||||
"if more than -promscrape.discovery.concurrency requests are simultaneously performed")
|
||||
)
|
||||
|
||||
var defaultClient = &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
@@ -56,6 +65,7 @@ func NewClient(apiServer string, ac *promauth.Config) (*Client, error) {
|
||||
ReadTimeout: time.Minute,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
MaxResponseBodySize: 300 * 1024 * 1024,
|
||||
MaxConns: *maxConcurrency,
|
||||
}
|
||||
return &Client{
|
||||
hc: hc,
|
||||
@@ -65,8 +75,30 @@ func NewClient(apiServer string, ac *promauth.Config) (*Client, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
var (
|
||||
concurrencyLimitCh chan struct{}
|
||||
concurrencyLimitChOnce sync.Once
|
||||
)
|
||||
|
||||
func concurrencyLimitChInit() {
|
||||
concurrencyLimitCh = make(chan struct{}, *maxConcurrency)
|
||||
}
|
||||
|
||||
// GetAPIResponse returns response for the given absolute path.
|
||||
func (c *Client) GetAPIResponse(path string) ([]byte, error) {
|
||||
// Limit the number of concurrent API requests.
|
||||
concurrencyLimitChOnce.Do(concurrencyLimitChInit)
|
||||
t := timerpool.Get(*maxWaitTime)
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
timerpool.Put(t)
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
return nil, fmt.Errorf("too many outstanding requests to %q; try increasing -promscrape.discovery.concurrentWaitTime=%s or -promscrape.discovery.concurrency=%d",
|
||||
c.apiServer, *maxWaitTime, *maxConcurrency)
|
||||
}
|
||||
defer func() { <-concurrencyLimitCh }()
|
||||
|
||||
requestURL := c.apiServer + path
|
||||
var u fasthttp.URI
|
||||
u.Update(requestURL)
|
||||
|
||||
@@ -3,6 +3,8 @@ package discoveryutils
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// ConfigMap is a map for storing discovery api configs.
|
||||
@@ -37,7 +39,7 @@ func (cm *ConfigMap) Get(key interface{}, newConfig func() (interface{}, error))
|
||||
|
||||
e := cm.m[key]
|
||||
if e != nil {
|
||||
e.lastAccessTime = time.Now()
|
||||
e.lastAccessTime = fasttime.UnixTimestamp()
|
||||
return e.cfg, nil
|
||||
}
|
||||
cfg, err := newConfig()
|
||||
@@ -46,17 +48,18 @@ func (cm *ConfigMap) Get(key interface{}, newConfig func() (interface{}, error))
|
||||
}
|
||||
cm.m[key] = &configMapEntry{
|
||||
cfg: cfg,
|
||||
lastAccessTime: time.Now(),
|
||||
lastAccessTime: fasttime.UnixTimestamp(),
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func (cm *ConfigMap) cleaner() {
|
||||
tc := time.NewTicker(15 * time.Minute)
|
||||
for currentTime := range tc.C {
|
||||
for range tc.C {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
cm.mu.Lock()
|
||||
for k, e := range cm.m {
|
||||
if currentTime.Sub(e.lastAccessTime) > 10*time.Minute {
|
||||
if currentTime-e.lastAccessTime > 10*60 {
|
||||
delete(cm.m, k)
|
||||
}
|
||||
}
|
||||
@@ -66,5 +69,5 @@ func (cm *ConfigMap) cleaner() {
|
||||
|
||||
type configMapEntry struct {
|
||||
cfg interface{}
|
||||
lastAccessTime time.Time
|
||||
lastAccessTime uint64
|
||||
}
|
||||
|
||||
@@ -37,6 +37,15 @@ var (
|
||||
"See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config for details")
|
||||
)
|
||||
|
||||
// CheckConfig checks -promscrape.config for errors and unsupported options.
|
||||
func CheckConfig() error {
|
||||
if *promscrapeConfigFile == "" {
|
||||
return fmt.Errorf("missing -promscrape.config option")
|
||||
}
|
||||
_, _, err := loadConfig(*promscrapeConfigFile)
|
||||
return err
|
||||
}
|
||||
|
||||
// Init initializes Prometheus scraper with config from the `-promscrape.config`.
|
||||
//
|
||||
// Scraped data is passed to pushData.
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -69,7 +70,7 @@ func (ctx *streamContext) Read(r io.Reader) bool {
|
||||
rows := ctx.Rows.Rows
|
||||
|
||||
// Fill missing timestamps with the current timestamp rounded to seconds.
|
||||
currentTimestamp := time.Now().Unix()
|
||||
currentTimestamp := int64(fasttime.UnixTimestamp())
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
if r.Timestamp == 0 {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -68,7 +69,7 @@ func (ctx *streamContext) Read(r io.Reader) bool {
|
||||
rows := ctx.Rows.Rows
|
||||
|
||||
// Fill in missing timestamps
|
||||
currentTimestamp := time.Now().Unix()
|
||||
currentTimestamp := int64(fasttime.UnixTimestamp())
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
if r.Timestamp == 0 {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -67,7 +68,7 @@ func ParseStream(req *http.Request, callback func(rows []Row) error) error {
|
||||
rows := ctx.Rows.Rows
|
||||
|
||||
// Fill in missing timestamps
|
||||
currentTimestamp := time.Now().Unix()
|
||||
currentTimestamp := int64(fasttime.UnixTimestamp())
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
if r.Timestamp == 0 {
|
||||
|
||||
@@ -189,7 +189,7 @@ func unmarshalBlockHeaders(dst []blockHeader, src []byte, blockHeadersCount int)
|
||||
for len(src) > 0 {
|
||||
tmp, err := bh.Unmarshal(src)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal block header: %s", err)
|
||||
return dst, fmt.Errorf("cannot unmarshal block header: %s", err)
|
||||
}
|
||||
src = tmp
|
||||
dst = append(dst, bh)
|
||||
@@ -199,12 +199,12 @@ func unmarshalBlockHeaders(dst []blockHeader, src []byte, blockHeadersCount int)
|
||||
|
||||
// Verify the number of read block headers.
|
||||
if len(newBHS) != blockHeadersCount {
|
||||
return nil, fmt.Errorf("invalid number of block headers found: %d; want %d block headers", len(newBHS), blockHeadersCount)
|
||||
return dst, fmt.Errorf("invalid number of block headers found: %d; want %d block headers", len(newBHS), blockHeadersCount)
|
||||
}
|
||||
|
||||
// Verify that block headers are sorted by tsid.
|
||||
if !sort.SliceIsSorted(newBHS, func(i, j int) bool { return newBHS[i].TSID.Less(&newBHS[j].TSID) }) {
|
||||
return nil, fmt.Errorf("block headers must be sorted by tsid; unmarshaled unsorted block headers: %+v", newBHS)
|
||||
return dst, fmt.Errorf("block headers must be sorted by tsid; unmarshaled unsorted block headers: %+v", newBHS)
|
||||
}
|
||||
|
||||
return dst, nil
|
||||
|
||||
@@ -71,7 +71,10 @@ func (bsw *blockStreamWriter) reset() {
|
||||
// InitFromInmemoryPart initialzes bsw from inmemory part.
|
||||
func (bsw *blockStreamWriter) InitFromInmemoryPart(mp *inmemoryPart) {
|
||||
bsw.reset()
|
||||
bsw.compressLevel = 0
|
||||
|
||||
// Use the minimum compression level for in-memory blocks,
|
||||
// since they are going to be re-compressed during the merge into file-based blocks.
|
||||
bsw.compressLevel = -5 // See https://github.com/facebook/zstd/releases/tag/v1.3.4
|
||||
|
||||
bsw.timestampsWriter = &mp.timestampsData
|
||||
bsw.valuesWriter = &mp.valuesData
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -633,7 +634,7 @@ func (db *indexDB) generateTSID(dst *TSID, metricName []byte, mn *MetricName) er
|
||||
if len(mn.Tags) > 1 {
|
||||
dst.InstanceID = uint32(xxhash.Sum64(mn.Tags[1].Value))
|
||||
}
|
||||
dst.MetricID = getUniqueUint64()
|
||||
dst.MetricID = generateUniqueMetricID()
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1233,7 +1234,7 @@ func (db *indexDB) updateDeletedMetricIDs(metricIDs *uint64set.Set) {
|
||||
}
|
||||
|
||||
func (is *indexSearch) getStartDateForPerDayInvertedIndex() (uint64, error) {
|
||||
minDate := uint64(timestampFromTime(time.Now())) / msecPerDay
|
||||
minDate := fasttime.UnixDate()
|
||||
kb := &is.kb
|
||||
ts := &is.ts
|
||||
kb.B = append(kb.B[:0], nsPrefixDateTagToMetricIDs)
|
||||
@@ -1866,7 +1867,7 @@ func (is *indexSearch) searchMetricIDs(tfss []*TagFilters, tr TimeRange, maxMetr
|
||||
return nil, err
|
||||
}
|
||||
if metricIDs.Len() > maxMetrics {
|
||||
return nil, fmt.Errorf("the number or unique timeseries exceeds %d; either narrow down the search or increase -search.maxUniqueTimeseries", maxMetrics)
|
||||
return nil, fmt.Errorf("the number of unique timeseries exceeds %d; either narrow down the search or increase -search.maxUniqueTimeseries", maxMetrics)
|
||||
}
|
||||
// Stop the iteration, since we cannot find more metric ids with the remaining tfss.
|
||||
break
|
||||
@@ -1875,7 +1876,7 @@ func (is *indexSearch) searchMetricIDs(tfss []*TagFilters, tr TimeRange, maxMetr
|
||||
return nil, err
|
||||
}
|
||||
if metricIDs.Len() > maxMetrics {
|
||||
return nil, fmt.Errorf("the number or matching unique timeseries exceeds %d; either narrow down the search or increase -search.maxUniqueTimeseries", maxMetrics)
|
||||
return nil, fmt.Errorf("the number of matching unique timeseries exceeds %d; either narrow down the search or increase -search.maxUniqueTimeseries", maxMetrics)
|
||||
}
|
||||
}
|
||||
if metricIDs.Len() == 0 {
|
||||
@@ -2529,19 +2530,7 @@ func (is *indexSearch) getMetricIDsForRecentHours(tr TimeRange, maxMetrics int)
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (db *indexDB) storeDateMetricID(date, metricID uint64) error {
|
||||
is := db.getIndexSearch()
|
||||
ok, err := is.hasDateMetricID(date, metricID)
|
||||
db.putIndexSearch(is)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ok {
|
||||
// Fast path: the (date, metricID) entry already exists in the db.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Slow path: create (date, metricID) entries.
|
||||
func (is *indexSearch) storeDateMetricID(date, metricID uint64) error {
|
||||
items := getIndexItems()
|
||||
defer putIndexItems(items)
|
||||
|
||||
@@ -2555,12 +2544,16 @@ func (db *indexDB) storeDateMetricID(date, metricID uint64) error {
|
||||
defer kbPool.Put(kb)
|
||||
mn := GetMetricName()
|
||||
defer PutMetricName(mn)
|
||||
kb.B, err = db.searchMetricName(kb.B[:0], metricID)
|
||||
var err error
|
||||
// There is no need in searching for metric name in is.db.extDB,
|
||||
// Since the storeDateMetricID function is called only after the metricID->metricName
|
||||
// is added into the current is.db.
|
||||
kb.B, err = is.searchMetricName(kb.B[:0], metricID)
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
logger.Errorf("missing metricName by metricID %d; this could be the case after unclean shutdown; "+
|
||||
"deleting the metricID, so it could be re-created next time", metricID)
|
||||
if err := db.deleteMetricIDs([]uint64{metricID}); err != nil {
|
||||
if err := is.db.deleteMetricIDs([]uint64{metricID}); err != nil {
|
||||
return fmt.Errorf("cannot delete metricID %d after unclean shutdown: %s", metricID, err)
|
||||
}
|
||||
return nil
|
||||
@@ -2585,8 +2578,7 @@ func (db *indexDB) storeDateMetricID(date, metricID uint64) error {
|
||||
items.B = encoding.MarshalUint64(items.B, metricID)
|
||||
items.Next()
|
||||
}
|
||||
|
||||
if err = db.tb.AddItems(items.Items); err != nil {
|
||||
if err = is.db.tb.AddItems(items.Items); err != nil {
|
||||
return fmt.Errorf("cannot add per-day entires for metricID %d: %s", metricID, err)
|
||||
}
|
||||
return nil
|
||||
@@ -2785,14 +2777,17 @@ func (is *indexSearch) intersectMetricIDsWithTagFilterNocache(tf *tagFilter, fil
|
||||
var kbPool bytesutil.ByteBufferPool
|
||||
|
||||
// Returns local unique MetricID.
|
||||
func getUniqueUint64() uint64 {
|
||||
return atomic.AddUint64(&uniqueUint64, 1)
|
||||
func generateUniqueMetricID() uint64 {
|
||||
// It is expected that metricIDs returned from this function must be dense.
|
||||
// If they will be sparse, then this may hurt metric_ids intersection
|
||||
// performance with uint64set.Set.
|
||||
return atomic.AddUint64(&nextUniqueMetricID, 1)
|
||||
}
|
||||
|
||||
// This number mustn't go backwards on restarts, otherwise metricID
|
||||
// collisions are possible. So don't change time on the server
|
||||
// between VictoriaMetrics restarts.
|
||||
var uniqueUint64 = uint64(time.Now().UnixNano())
|
||||
var nextUniqueMetricID = uint64(time.Now().UnixNano())
|
||||
|
||||
func marshalCommonPrefix(dst []byte, nsPrefix byte) []byte {
|
||||
dst = append(dst, nsPrefix)
|
||||
|
||||
@@ -685,7 +685,7 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, metricGroups int) ([]MetricNa
|
||||
date := uint64(timestampFromTime(time.Now())) / msecPerDay
|
||||
for i := range tsids {
|
||||
tsid := &tsids[i]
|
||||
if err := db.storeDateMetricID(date, tsid.MetricID); err != nil {
|
||||
if err := is.storeDateMetricID(date, tsid.MetricID); err != nil {
|
||||
return nil, nil, fmt.Errorf("error in storeDateMetricID(%d, %d): %s", date, tsid.MetricID, err)
|
||||
}
|
||||
}
|
||||
@@ -1476,7 +1476,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
||||
date := baseDate - uint64(day*msecPerDay)
|
||||
for i := range tsids {
|
||||
tsid := &tsids[i]
|
||||
if err := db.storeDateMetricID(date, tsid.MetricID); err != nil {
|
||||
if err := is.storeDateMetricID(date, tsid.MetricID); err != nil {
|
||||
t.Fatalf("error in storeDateMetricID(%d, %d): %s", date, tsid.MetricID, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,9 +2,9 @@ package storage
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -17,7 +17,7 @@ type inmemoryPart struct {
|
||||
indexData bytesutil.ByteBuffer
|
||||
metaindexData bytesutil.ByteBuffer
|
||||
|
||||
creationTime time.Time
|
||||
creationTime uint64
|
||||
}
|
||||
|
||||
// Reset resets mp.
|
||||
@@ -29,7 +29,7 @@ func (mp *inmemoryPart) Reset() {
|
||||
mp.indexData.Reset()
|
||||
mp.metaindexData.Reset()
|
||||
|
||||
mp.creationTime = time.Time{}
|
||||
mp.creationTime = 0
|
||||
}
|
||||
|
||||
// InitFromRows initializes mp from the given rows.
|
||||
@@ -42,7 +42,7 @@ func (mp *inmemoryPart) InitFromRows(rows []rawRow) {
|
||||
rrm := getRawRowsMarshaler()
|
||||
rrm.marshalToInmemoryPart(mp, rows)
|
||||
putRawRowsMarshaler(rrm)
|
||||
mp.creationTime = time.Now()
|
||||
mp.creationTime = fasttime.UnixTimestamp()
|
||||
}
|
||||
|
||||
// NewPart creates new part from mp.
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -117,6 +118,7 @@ func newPart(ph *partHeader, path string, size uint64, metaindexReader filestrea
|
||||
p.indexFile = indexFile
|
||||
|
||||
p.metaindex = metaindex
|
||||
p.ibCache = newIndexBlockCache()
|
||||
|
||||
if len(errors) > 0 {
|
||||
// Return only the first error, since it has no sense in returning all errors.
|
||||
@@ -125,8 +127,6 @@ func newPart(ph *partHeader, path string, size uint64, metaindexReader filestrea
|
||||
return nil, err
|
||||
}
|
||||
|
||||
p.ibCache = newIndexBlockCache()
|
||||
|
||||
return &p, nil
|
||||
}
|
||||
|
||||
@@ -229,7 +229,7 @@ func (ibc *indexBlockCache) cleaner() {
|
||||
}
|
||||
|
||||
func (ibc *indexBlockCache) cleanByTimeout() {
|
||||
currentTime := atomic.LoadUint64(¤tTimestamp)
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
ibc.mu.Lock()
|
||||
for k, ibe := range ibc.m {
|
||||
// Delete items accessed more than 10 minutes ago.
|
||||
@@ -248,7 +248,7 @@ func (ibc *indexBlockCache) Get(k uint64) *indexBlock {
|
||||
ibc.mu.RUnlock()
|
||||
|
||||
if ibe != nil {
|
||||
currentTime := atomic.LoadUint64(¤tTimestamp)
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
if atomic.LoadUint64(&ibe.lastAccessTime) != currentTime {
|
||||
atomic.StoreUint64(&ibe.lastAccessTime, currentTime)
|
||||
}
|
||||
@@ -258,7 +258,7 @@ func (ibc *indexBlockCache) Get(k uint64) *indexBlock {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ibc *indexBlockCache) Put(k uint64, ib *indexBlock) bool {
|
||||
func (ibc *indexBlockCache) Put(k uint64, ib *indexBlock) {
|
||||
ibc.mu.Lock()
|
||||
|
||||
// Clean superflouos cache entries.
|
||||
@@ -277,12 +277,11 @@ func (ibc *indexBlockCache) Put(k uint64, ib *indexBlock) bool {
|
||||
|
||||
// Store frequently requested ib in the cache.
|
||||
ibe := &indexBlockCacheEntry{
|
||||
lastAccessTime: atomic.LoadUint64(¤tTimestamp),
|
||||
lastAccessTime: fasttime.UnixTimestamp(),
|
||||
ib: ib,
|
||||
}
|
||||
ibc.m[k] = ibe
|
||||
ibc.mu.Unlock()
|
||||
return true
|
||||
}
|
||||
|
||||
func (ibc *indexBlockCache) Requests() uint64 {
|
||||
@@ -299,16 +298,3 @@ func (ibc *indexBlockCache) Len() uint64 {
|
||||
ibc.mu.Unlock()
|
||||
return n
|
||||
}
|
||||
|
||||
func init() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(time.Second)
|
||||
defer ticker.Stop()
|
||||
for tm := range ticker.C {
|
||||
t := uint64(tm.Unix())
|
||||
atomic.StoreUint64(¤tTimestamp, t)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
var currentTimestamp uint64
|
||||
|
||||
@@ -36,9 +36,6 @@ type partSearch struct {
|
||||
|
||||
bhs []blockHeader
|
||||
|
||||
// Pointer to index block, which may be reused
|
||||
indexBlockReuse *indexBlock
|
||||
|
||||
compressedIndexBuf []byte
|
||||
indexBuf []byte
|
||||
|
||||
@@ -53,10 +50,6 @@ func (ps *partSearch) reset() {
|
||||
ps.metaindex = nil
|
||||
ps.ibCache = nil
|
||||
ps.bhs = nil
|
||||
if ps.indexBlockReuse != nil {
|
||||
putIndexBlock(ps.indexBlockReuse)
|
||||
ps.indexBlockReuse = nil
|
||||
}
|
||||
ps.compressedIndexBuf = ps.compressedIndexBuf[:0]
|
||||
ps.indexBuf = ps.indexBuf[:0]
|
||||
ps.err = nil
|
||||
@@ -161,10 +154,6 @@ func (ps *partSearch) nextBHS() bool {
|
||||
|
||||
// Found the index block which may contain the required data
|
||||
// for the ps.BlockRef.bh.TSID and the given timestamp range.
|
||||
if ps.indexBlockReuse != nil {
|
||||
putIndexBlock(ps.indexBlockReuse)
|
||||
ps.indexBlockReuse = nil
|
||||
}
|
||||
indexBlockKey := mr.IndexBlockOffset
|
||||
ib := ps.ibCache.Get(indexBlockKey)
|
||||
if ib == nil {
|
||||
@@ -176,9 +165,7 @@ func (ps *partSearch) nextBHS() bool {
|
||||
&ps.p.ph, mr.IndexBlockOffset, mr.IndexBlockSize, err)
|
||||
return false
|
||||
}
|
||||
if ok := ps.ibCache.Put(indexBlockKey, ib); !ok {
|
||||
ps.indexBlockReuse = ib
|
||||
}
|
||||
ps.ibCache.Put(indexBlockKey, ib)
|
||||
}
|
||||
ps.bhs = ib.bhs
|
||||
return true
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -445,7 +446,7 @@ func (rrs *rawRowsShards) Len() int {
|
||||
type rawRowsShard struct {
|
||||
lock sync.Mutex
|
||||
rows []rawRow
|
||||
lastFlushTime time.Time
|
||||
lastFlushTime uint64
|
||||
}
|
||||
|
||||
func (rrs *rawRowsShard) Len() int {
|
||||
@@ -478,7 +479,7 @@ func (rrs *rawRowsShard) addRows(pt *partition, rows []rawRow) {
|
||||
rr := getRawRowsMaxSize()
|
||||
rrs.rows, rr.rows = rr.rows, rrs.rows
|
||||
rrss = append(rrss, rr)
|
||||
rrs.lastFlushTime = time.Now()
|
||||
rrs.lastFlushTime = fasttime.UnixTimestamp()
|
||||
}
|
||||
rrs.lock.Unlock()
|
||||
|
||||
@@ -722,10 +723,14 @@ func (rrs *rawRowsShards) flush(pt *partition, isFinal bool) {
|
||||
|
||||
func (rrs *rawRowsShard) flush(pt *partition, isFinal bool) {
|
||||
var rr *rawRows
|
||||
currentTime := time.Now()
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
flushSeconds := int64(rawRowsFlushInterval.Seconds())
|
||||
if flushSeconds <= 0 {
|
||||
flushSeconds = 1
|
||||
}
|
||||
|
||||
rrs.lock.Lock()
|
||||
if isFinal || currentTime.Sub(rrs.lastFlushTime) > rawRowsFlushInterval {
|
||||
if isFinal || currentTime-rrs.lastFlushTime > uint64(flushSeconds) {
|
||||
rr = getRawRowsMaxSize()
|
||||
rrs.rows, rr.rows = rr.rows, rrs.rows
|
||||
}
|
||||
@@ -764,7 +769,11 @@ func (pt *partition) inmemoryPartsFlusher() {
|
||||
}
|
||||
|
||||
func (pt *partition) flushInmemoryParts(dstPws []*partWrapper, force bool) ([]*partWrapper, error) {
|
||||
currentTime := time.Now()
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
flushSeconds := int64(inmemoryPartsFlushInterval.Seconds())
|
||||
if flushSeconds <= 0 {
|
||||
flushSeconds = 1
|
||||
}
|
||||
|
||||
// Inmemory parts may present only in small parts.
|
||||
pt.partsLock.Lock()
|
||||
@@ -772,7 +781,7 @@ func (pt *partition) flushInmemoryParts(dstPws []*partWrapper, force bool) ([]*p
|
||||
if pw.mp == nil || pw.isInMerge {
|
||||
continue
|
||||
}
|
||||
if force || currentTime.Sub(pw.mp.creationTime) >= inmemoryPartsFlushInterval {
|
||||
if force || currentTime-pw.mp.creationTime >= uint64(flushSeconds) {
|
||||
pw.isInMerge = true
|
||||
dstPws = append(dstPws, pw)
|
||||
}
|
||||
@@ -876,7 +885,7 @@ const (
|
||||
|
||||
func (pt *partition) partsMerger(mergerFunc func(isFinal bool) error) error {
|
||||
sleepTime := minMergeSleepTime
|
||||
var lastMergeTime time.Time
|
||||
var lastMergeTime uint64
|
||||
isFinal := false
|
||||
t := time.NewTimer(sleepTime)
|
||||
for {
|
||||
@@ -884,7 +893,7 @@ func (pt *partition) partsMerger(mergerFunc func(isFinal bool) error) error {
|
||||
if err == nil {
|
||||
// Try merging additional parts.
|
||||
sleepTime = minMergeSleepTime
|
||||
lastMergeTime = time.Now()
|
||||
lastMergeTime = fasttime.UnixTimestamp()
|
||||
isFinal = false
|
||||
continue
|
||||
}
|
||||
@@ -895,10 +904,10 @@ func (pt *partition) partsMerger(mergerFunc func(isFinal bool) error) error {
|
||||
if err != errNothingToMerge {
|
||||
return err
|
||||
}
|
||||
if time.Since(lastMergeTime) > 30*time.Second {
|
||||
if fasttime.UnixTimestamp()-lastMergeTime > 30 {
|
||||
// We have free time for merging into bigger parts.
|
||||
// This should improve select performance.
|
||||
lastMergeTime = time.Now()
|
||||
lastMergeTime = fasttime.UnixTimestamp()
|
||||
isFinal = true
|
||||
continue
|
||||
}
|
||||
@@ -939,7 +948,7 @@ func mustGetFreeDiskSpace(path string) uint64 {
|
||||
defer freeSpaceMapLock.Unlock()
|
||||
|
||||
e, ok := freeSpaceMap[path]
|
||||
if ok && time.Since(e.updateTime) < time.Second {
|
||||
if ok && fasttime.UnixTimestamp()-e.updateTime < 2 {
|
||||
// Fast path - the entry is fresh.
|
||||
return e.freeSpace
|
||||
}
|
||||
@@ -947,7 +956,7 @@ func mustGetFreeDiskSpace(path string) uint64 {
|
||||
// Slow path.
|
||||
// Determine the amount of free space on bigPartsPath.
|
||||
e.freeSpace = fs.MustGetFreeSpace(path)
|
||||
e.updateTime = time.Now()
|
||||
e.updateTime = fasttime.UnixTimestamp()
|
||||
freeSpaceMap[path] = e
|
||||
return e.freeSpace
|
||||
}
|
||||
@@ -958,7 +967,7 @@ var (
|
||||
)
|
||||
|
||||
type freeSpaceEntry struct {
|
||||
updateTime time.Time
|
||||
updateTime uint64
|
||||
freeSpace uint64
|
||||
}
|
||||
|
||||
@@ -1060,8 +1069,10 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
|
||||
}
|
||||
|
||||
outRowsCount := uint64(0)
|
||||
outBlocksCount := uint64(0)
|
||||
for _, pw := range pws {
|
||||
outRowsCount += pw.p.ph.RowsCount
|
||||
outBlocksCount += pw.p.ph.BlocksCount
|
||||
}
|
||||
isBigPart := outRowsCount > maxRowsPerSmallPart()
|
||||
nocache := isBigPart
|
||||
@@ -1075,7 +1086,7 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
|
||||
mergeIdx := pt.nextMergeIdx()
|
||||
tmpPartPath := fmt.Sprintf("%s/tmp/%016X", ptPath, mergeIdx)
|
||||
bsw := getBlockStreamWriter()
|
||||
compressLevel := getCompressLevelForRowsCount(outRowsCount)
|
||||
compressLevel := getCompressLevelForRowsCount(outRowsCount, outBlocksCount)
|
||||
if err := bsw.InitFromFilePart(tmpPartPath, nocache, compressLevel); err != nil {
|
||||
return fmt.Errorf("cannot create destination part %q: %s", tmpPartPath, err)
|
||||
}
|
||||
@@ -1176,24 +1187,28 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
|
||||
|
||||
d := time.Since(startTime)
|
||||
if d > 10*time.Second {
|
||||
logger.Infof("merged %d rows in %.3f seconds at %d rows/sec to %q; sizeBytes: %d",
|
||||
outRowsCount, d.Seconds(), int(float64(outRowsCount)/d.Seconds()), dstPartPath, newPSize)
|
||||
logger.Infof("merged %d rows across %d blocks in %.3f seconds at %d rows/sec to %q; sizeBytes: %d",
|
||||
outRowsCount, outBlocksCount, d.Seconds(), int(float64(outRowsCount)/d.Seconds()), dstPartPath, newPSize)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCompressLevelForRowsCount(rowsCount uint64) int {
|
||||
if rowsCount <= 1<<19 {
|
||||
func getCompressLevelForRowsCount(rowsCount, blocksCount uint64) int {
|
||||
avgRowsPerBlock := rowsCount / blocksCount
|
||||
if avgRowsPerBlock <= 200 {
|
||||
return -1
|
||||
}
|
||||
if avgRowsPerBlock <= 500 {
|
||||
return 1
|
||||
}
|
||||
if rowsCount <= 1<<22 {
|
||||
if avgRowsPerBlock <= 1000 {
|
||||
return 2
|
||||
}
|
||||
if rowsCount <= 1<<25 {
|
||||
if avgRowsPerBlock <= 2000 {
|
||||
return 3
|
||||
}
|
||||
if rowsCount <= 1<<28 {
|
||||
if avgRowsPerBlock <= 4000 {
|
||||
return 4
|
||||
}
|
||||
return 5
|
||||
|
||||
@@ -161,16 +161,20 @@ func testSearchGeneric(t *testing.T, forcePerDayInvertedIndex bool) {
|
||||
ch <- testSearchInternal(st, tr, mrs, accountsCount)
|
||||
}()
|
||||
}
|
||||
var firstError error
|
||||
for i := 0; i < cap(ch); i++ {
|
||||
select {
|
||||
case err := <-ch:
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
if err != nil && firstError == nil {
|
||||
firstError = err
|
||||
}
|
||||
case <-time.After(10 * time.Second):
|
||||
t.Fatalf("timeout")
|
||||
}
|
||||
}
|
||||
if firstError != nil {
|
||||
t.Fatalf("unexpected error: %s", firstError)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -39,6 +39,10 @@ type Storage struct {
|
||||
addRowsConcurrencyLimitTimeout uint64
|
||||
addRowsConcurrencyDroppedRows uint64
|
||||
|
||||
slowRowInserts uint64
|
||||
slowPerDayIndexInserts uint64
|
||||
slowMetricNameLoads uint64
|
||||
|
||||
path string
|
||||
cachePath string
|
||||
retentionMonths int
|
||||
@@ -68,16 +72,27 @@ type Storage struct {
|
||||
// Fast cache for MetricID values occurred during the previous hour.
|
||||
prevHourMetricIDs atomic.Value
|
||||
|
||||
// Fast cache for pre-populating per-day inverted index for the next day.
|
||||
// This is needed in order to remove CPU usage spikes at 00:00 UTC
|
||||
// due to creation of per-day inverted index for active time series.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/430 for details.
|
||||
nextDayMetricIDs atomic.Value
|
||||
|
||||
// Pending MetricID values to be added to currHourMetricIDs.
|
||||
pendingHourEntriesLock sync.Mutex
|
||||
pendingHourEntries *uint64set.Set
|
||||
|
||||
// Pending MetricIDs to be added to nextDayMetricIDs.
|
||||
pendingNextDayMetricIDsLock sync.Mutex
|
||||
pendingNextDayMetricIDs *uint64set.Set
|
||||
|
||||
// metricIDs for pre-fetched metricNames in the prefetchMetricNames function.
|
||||
prefetchedMetricIDs atomic.Value
|
||||
|
||||
stop chan struct{}
|
||||
|
||||
currHourMetricIDsUpdaterWG sync.WaitGroup
|
||||
nextDayMetricIDsUpdaterWG sync.WaitGroup
|
||||
retentionWatcherWG sync.WaitGroup
|
||||
prefetchedMetricIDsCleanerWG sync.WaitGroup
|
||||
|
||||
@@ -131,13 +146,18 @@ func OpenStorage(path string, retentionMonths int) (*Storage, error) {
|
||||
s.metricNameCache = s.mustLoadCache("MetricID->MetricName", "metricID_metricName", mem/8)
|
||||
s.dateMetricIDCache = newDateMetricIDCache()
|
||||
|
||||
hour := uint64(timestampFromTime(time.Now())) / msecPerHour
|
||||
hour := fasttime.UnixHour()
|
||||
hmCurr := s.mustLoadHourMetricIDs(hour, "curr_hour_metric_ids")
|
||||
hmPrev := s.mustLoadHourMetricIDs(hour-1, "prev_hour_metric_ids")
|
||||
s.currHourMetricIDs.Store(hmCurr)
|
||||
s.prevHourMetricIDs.Store(hmPrev)
|
||||
s.pendingHourEntries = &uint64set.Set{}
|
||||
|
||||
date := fasttime.UnixDate()
|
||||
nextDayMetricIDs := s.mustLoadNextDayMetricIDs(date)
|
||||
s.nextDayMetricIDs.Store(nextDayMetricIDs)
|
||||
s.pendingNextDayMetricIDs = &uint64set.Set{}
|
||||
|
||||
s.prefetchedMetricIDs.Store(&uint64set.Set{})
|
||||
|
||||
// Load indexdb
|
||||
@@ -163,6 +183,7 @@ func OpenStorage(path string, retentionMonths int) (*Storage, error) {
|
||||
s.tb = tb
|
||||
|
||||
s.startCurrHourMetricIDsUpdater()
|
||||
s.startNextDayMetricIDsUpdater()
|
||||
s.startRetentionWatcher()
|
||||
s.startPrefetchedMetricIDsCleaner()
|
||||
|
||||
@@ -306,6 +327,10 @@ type Metrics struct {
|
||||
AddRowsConcurrencyCapacity uint64
|
||||
AddRowsConcurrencyCurrent uint64
|
||||
|
||||
SlowRowInserts uint64
|
||||
SlowPerDayIndexInserts uint64
|
||||
SlowMetricNameLoads uint64
|
||||
|
||||
TSIDCacheSize uint64
|
||||
TSIDCacheSizeBytes uint64
|
||||
TSIDCacheRequests uint64
|
||||
@@ -332,6 +357,9 @@ type Metrics struct {
|
||||
HourMetricIDCacheSize uint64
|
||||
HourMetricIDCacheSizeBytes uint64
|
||||
|
||||
NextDayMetricIDCacheSize uint64
|
||||
NextDayMetricIDCacheSizeBytes uint64
|
||||
|
||||
PrefetchedMetricIDsSize uint64
|
||||
PrefetchedMetricIDsSizeBytes uint64
|
||||
|
||||
@@ -357,6 +385,10 @@ func (s *Storage) UpdateMetrics(m *Metrics) {
|
||||
m.AddRowsConcurrencyCapacity = uint64(cap(addRowsConcurrencyCh))
|
||||
m.AddRowsConcurrencyCurrent = uint64(len(addRowsConcurrencyCh))
|
||||
|
||||
m.SlowRowInserts += atomic.LoadUint64(&s.slowRowInserts)
|
||||
m.SlowPerDayIndexInserts += atomic.LoadUint64(&s.slowPerDayIndexInserts)
|
||||
m.SlowMetricNameLoads += atomic.LoadUint64(&s.slowMetricNameLoads)
|
||||
|
||||
var cs fastcache.Stats
|
||||
s.tsidCache.UpdateStats(&cs)
|
||||
m.TSIDCacheSize += cs.EntriesCount
|
||||
@@ -396,6 +428,10 @@ func (s *Storage) UpdateMetrics(m *Metrics) {
|
||||
m.HourMetricIDCacheSizeBytes += hmCurr.m.SizeBytes()
|
||||
m.HourMetricIDCacheSizeBytes += hmPrev.m.SizeBytes()
|
||||
|
||||
nextDayMetricIDs := &s.nextDayMetricIDs.Load().(*byDateMetricIDEntry).v
|
||||
m.NextDayMetricIDCacheSize += uint64(nextDayMetricIDs.Len())
|
||||
m.NextDayMetricIDCacheSizeBytes += nextDayMetricIDs.SizeBytes()
|
||||
|
||||
prefetchedMetricIDs := s.prefetchedMetricIDs.Load().(*uint64set.Set)
|
||||
m.PrefetchedMetricIDsSize += uint64(prefetchedMetricIDs.Len())
|
||||
m.PrefetchedMetricIDsSizeBytes += uint64(prefetchedMetricIDs.SizeBytes())
|
||||
@@ -453,6 +489,14 @@ func (s *Storage) startCurrHourMetricIDsUpdater() {
|
||||
}()
|
||||
}
|
||||
|
||||
func (s *Storage) startNextDayMetricIDsUpdater() {
|
||||
s.nextDayMetricIDsUpdaterWG.Add(1)
|
||||
go func() {
|
||||
s.nextDayMetricIDsUpdater()
|
||||
s.nextDayMetricIDsUpdaterWG.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
var currHourMetricIDsUpdateInterval = time.Second * 10
|
||||
|
||||
func (s *Storage) currHourMetricIDsUpdater() {
|
||||
@@ -469,6 +513,22 @@ func (s *Storage) currHourMetricIDsUpdater() {
|
||||
}
|
||||
}
|
||||
|
||||
var nextDayMetricIDsUpdateInterval = time.Second * 11
|
||||
|
||||
func (s *Storage) nextDayMetricIDsUpdater() {
|
||||
ticker := time.NewTicker(nextDayMetricIDsUpdateInterval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-s.stop:
|
||||
s.updateNextDayMetricIDs()
|
||||
return
|
||||
case <-ticker.C:
|
||||
s.updateNextDayMetricIDs()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Storage) mustRotateIndexDB() {
|
||||
// Create new indexdb table.
|
||||
newTableName := nextIndexDBTableName()
|
||||
@@ -500,6 +560,8 @@ func (s *Storage) mustRotateIndexDB() {
|
||||
|
||||
// Do not flush metricIDCache and metricNameCache, since all the metricIDs
|
||||
// from prev idb remain valid after the rotation.
|
||||
|
||||
// There is no need in resetting nextDayMetricIDs, since it should be automatically reset every day.
|
||||
}
|
||||
|
||||
// MustClose closes the storage.
|
||||
@@ -508,6 +570,7 @@ func (s *Storage) MustClose() {
|
||||
|
||||
s.retentionWatcherWG.Wait()
|
||||
s.currHourMetricIDsUpdaterWG.Wait()
|
||||
s.nextDayMetricIDsUpdaterWG.Wait()
|
||||
|
||||
s.tb.MustClose()
|
||||
s.idb().MustClose()
|
||||
@@ -522,21 +585,70 @@ func (s *Storage) MustClose() {
|
||||
hmPrev := s.prevHourMetricIDs.Load().(*hourMetricIDs)
|
||||
s.mustSaveHourMetricIDs(hmPrev, "prev_hour_metric_ids")
|
||||
|
||||
nextDayMetricIDs := s.nextDayMetricIDs.Load().(*byDateMetricIDEntry)
|
||||
s.mustSaveNextDayMetricIDs(nextDayMetricIDs)
|
||||
|
||||
// Release lock file.
|
||||
if err := s.flockF.Close(); err != nil {
|
||||
logger.Panicf("FATAL: cannot close lock file %q: %s", s.flockF.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Storage) mustLoadHourMetricIDs(hour uint64, name string) *hourMetricIDs {
|
||||
func (s *Storage) mustLoadNextDayMetricIDs(date uint64) *byDateMetricIDEntry {
|
||||
e := &byDateMetricIDEntry{
|
||||
date: date,
|
||||
}
|
||||
name := "next_day_metric_ids"
|
||||
path := s.cachePath + "/" + name
|
||||
logger.Infof("loading %s from %q...", name, path)
|
||||
startTime := time.Now()
|
||||
if !fs.IsPathExist(path) {
|
||||
logger.Infof("nothing to load from %q", path)
|
||||
return &hourMetricIDs{
|
||||
hour: hour,
|
||||
}
|
||||
return e
|
||||
}
|
||||
src, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
logger.Panicf("FATAL: cannot read %s: %s", path, err)
|
||||
}
|
||||
srcOrigLen := len(src)
|
||||
if len(src) < 16 {
|
||||
logger.Errorf("discarding %s, since it has broken header; got %d bytes; want %d bytes", path, len(src), 16)
|
||||
return e
|
||||
}
|
||||
|
||||
// Unmarshal header
|
||||
dateLoaded := encoding.UnmarshalUint64(src)
|
||||
src = src[8:]
|
||||
if dateLoaded != date {
|
||||
logger.Infof("discarding %s, since it contains data for stale date; got %d; want %d", path, dateLoaded, date)
|
||||
return e
|
||||
}
|
||||
|
||||
// Unmarshal uint64set
|
||||
m, tail, err := unmarshalUint64Set(src)
|
||||
if err != nil {
|
||||
logger.Infof("discarding %s because cannot load uint64set: %s", path, err)
|
||||
return e
|
||||
}
|
||||
if len(tail) > 0 {
|
||||
logger.Infof("discarding %s because non-empty tail left; len(tail)=%d", path, len(tail))
|
||||
return e
|
||||
}
|
||||
e.v = *m
|
||||
logger.Infof("loaded %s from %q in %.3f seconds; entriesCount: %d; sizeBytes: %d", name, path, time.Since(startTime).Seconds(), m.Len(), srcOrigLen)
|
||||
return e
|
||||
}
|
||||
|
||||
func (s *Storage) mustLoadHourMetricIDs(hour uint64, name string) *hourMetricIDs {
|
||||
hm := &hourMetricIDs{
|
||||
hour: hour,
|
||||
}
|
||||
path := s.cachePath + "/" + name
|
||||
logger.Infof("loading %s from %q...", name, path)
|
||||
startTime := time.Now()
|
||||
if !fs.IsPathExist(path) {
|
||||
logger.Infof("nothing to load from %q", path)
|
||||
return hm
|
||||
}
|
||||
src, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
@@ -545,9 +657,7 @@ func (s *Storage) mustLoadHourMetricIDs(hour uint64, name string) *hourMetricIDs
|
||||
srcOrigLen := len(src)
|
||||
if len(src) < 24 {
|
||||
logger.Errorf("discarding %s, since it has broken header; got %d bytes; want %d bytes", path, len(src), 24)
|
||||
return &hourMetricIDs{
|
||||
hour: hour,
|
||||
}
|
||||
return hm
|
||||
}
|
||||
|
||||
// Unmarshal header
|
||||
@@ -556,34 +666,43 @@ func (s *Storage) mustLoadHourMetricIDs(hour uint64, name string) *hourMetricIDs
|
||||
hourLoaded := encoding.UnmarshalUint64(src)
|
||||
src = src[8:]
|
||||
if hourLoaded != hour {
|
||||
logger.Infof("discarding %s, since it contains outdated hour; got %d; want %d", name, hourLoaded, hour)
|
||||
return &hourMetricIDs{
|
||||
hour: hour,
|
||||
}
|
||||
logger.Infof("discarding %s, since it contains outdated hour; got %d; want %d", path, hourLoaded, hour)
|
||||
return hm
|
||||
}
|
||||
|
||||
// Unmarshal hm.m
|
||||
hmLen := encoding.UnmarshalUint64(src)
|
||||
src = src[8:]
|
||||
if uint64(len(src)) < 8*hmLen {
|
||||
logger.Errorf("discarding %s, since it has broken hm.m data; got %d bytes; want at least %d bytes", path, len(src), 8*hmLen)
|
||||
return &hourMetricIDs{
|
||||
hour: hour,
|
||||
}
|
||||
// Unmarshal uint64set
|
||||
m, tail, err := unmarshalUint64Set(src)
|
||||
if err != nil {
|
||||
logger.Infof("discarding %s because cannot load uint64set: %s", path, err)
|
||||
return hm
|
||||
}
|
||||
m := &uint64set.Set{}
|
||||
for i := uint64(0); i < hmLen; i++ {
|
||||
metricID := encoding.UnmarshalUint64(src)
|
||||
src = src[8:]
|
||||
m.Add(metricID)
|
||||
if len(tail) > 0 {
|
||||
logger.Infof("discarding %s because non-empty tail left; len(tail)=%d", path, len(tail))
|
||||
return hm
|
||||
}
|
||||
hm.m = m
|
||||
hm.isFull = isFull != 0
|
||||
logger.Infof("loaded %s from %q in %.3f seconds; entriesCount: %d; sizeBytes: %d", name, path, time.Since(startTime).Seconds(), m.Len(), srcOrigLen)
|
||||
return hm
|
||||
}
|
||||
|
||||
logger.Infof("loaded %s from %q in %.3f seconds; entriesCount: %d; sizeBytes: %d", name, path, time.Since(startTime).Seconds(), hmLen, srcOrigLen)
|
||||
return &hourMetricIDs{
|
||||
m: m,
|
||||
hour: hourLoaded,
|
||||
isFull: isFull != 0,
|
||||
func (s *Storage) mustSaveNextDayMetricIDs(e *byDateMetricIDEntry) {
|
||||
name := "next_day_metric_ids"
|
||||
path := s.cachePath + "/" + name
|
||||
logger.Infof("saving %s to %q...", name, path)
|
||||
startTime := time.Now()
|
||||
dst := make([]byte, 0, e.v.Len()*8+16)
|
||||
|
||||
// Marshal header
|
||||
dst = encoding.MarshalUint64(dst, e.date)
|
||||
|
||||
// Marshal e.v
|
||||
dst = marshalUint64Set(dst, &e.v)
|
||||
|
||||
if err := ioutil.WriteFile(path, dst, 0644); err != nil {
|
||||
logger.Panicf("FATAL: cannot write %d bytes to %q: %s", len(dst), path, err)
|
||||
}
|
||||
logger.Infof("saved %s to %q in %.3f seconds; entriesCount: %d; sizeBytes: %d", name, path, time.Since(startTime).Seconds(), e.v.Len(), len(dst))
|
||||
}
|
||||
|
||||
func (s *Storage) mustSaveHourMetricIDs(hm *hourMetricIDs, name string) {
|
||||
@@ -601,13 +720,7 @@ func (s *Storage) mustSaveHourMetricIDs(hm *hourMetricIDs, name string) {
|
||||
dst = encoding.MarshalUint64(dst, hm.hour)
|
||||
|
||||
// Marshal hm.m
|
||||
dst = encoding.MarshalUint64(dst, uint64(hm.m.Len()))
|
||||
hm.m.ForEach(func(part []uint64) bool {
|
||||
for _, metricID := range part {
|
||||
dst = encoding.MarshalUint64(dst, metricID)
|
||||
}
|
||||
return true
|
||||
})
|
||||
dst = marshalUint64Set(dst, hm.m)
|
||||
|
||||
if err := ioutil.WriteFile(path, dst, 0644); err != nil {
|
||||
logger.Panicf("FATAL: cannot write %d bytes to %q: %s", len(dst), path, err)
|
||||
@@ -615,6 +728,32 @@ func (s *Storage) mustSaveHourMetricIDs(hm *hourMetricIDs, name string) {
|
||||
logger.Infof("saved %s to %q in %.3f seconds; entriesCount: %d; sizeBytes: %d", name, path, time.Since(startTime).Seconds(), hm.m.Len(), len(dst))
|
||||
}
|
||||
|
||||
func unmarshalUint64Set(src []byte) (*uint64set.Set, []byte, error) {
|
||||
mLen := encoding.UnmarshalUint64(src)
|
||||
src = src[8:]
|
||||
if uint64(len(src)) < 8*mLen {
|
||||
return nil, nil, fmt.Errorf("cannot unmarshal uint64set; got %d bytes; want at least %d bytes", len(src), 8*mLen)
|
||||
}
|
||||
m := &uint64set.Set{}
|
||||
for i := uint64(0); i < mLen; i++ {
|
||||
metricID := encoding.UnmarshalUint64(src)
|
||||
src = src[8:]
|
||||
m.Add(metricID)
|
||||
}
|
||||
return m, src, nil
|
||||
}
|
||||
|
||||
func marshalUint64Set(dst []byte, m *uint64set.Set) []byte {
|
||||
dst = encoding.MarshalUint64(dst, uint64(m.Len()))
|
||||
m.ForEach(func(part []uint64) bool {
|
||||
for _, metricID := range part {
|
||||
dst = encoding.MarshalUint64(dst, metricID)
|
||||
}
|
||||
return true
|
||||
})
|
||||
return dst
|
||||
}
|
||||
|
||||
func (s *Storage) mustLoadCache(info, name string, sizeBytes int) *workingsetcache.Cache {
|
||||
path := s.cachePath + "/" + name
|
||||
logger.Infof("loading %s cache from %q...", info, path)
|
||||
@@ -682,6 +821,7 @@ func (s *Storage) prefetchMetricNames(tsids []TSID) error {
|
||||
// It is cheaper to skip pre-fetching and obtain metricNames inline.
|
||||
return nil
|
||||
}
|
||||
atomic.AddUint64(&s.slowMetricNameLoads, uint64(len(metricIDs)))
|
||||
|
||||
// Pre-fetch metricIDs.
|
||||
sort.Sort(metricIDs)
|
||||
@@ -891,10 +1031,6 @@ var (
|
||||
)
|
||||
|
||||
func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]rawRow, error) {
|
||||
var is *indexSearch
|
||||
var mn *MetricName
|
||||
var kb *bytesutil.ByteBuffer
|
||||
|
||||
idb := s.idb()
|
||||
dmis := idb.getDeletedMetricIDs()
|
||||
rowsLen := len(rows)
|
||||
@@ -908,9 +1044,10 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
|
||||
prevTSID TSID
|
||||
prevMetricNameRaw []byte
|
||||
)
|
||||
var pmrs *pendingMetricRows
|
||||
minTimestamp, maxTimestamp := s.tb.getMinMaxTimestamps()
|
||||
// Return only the last error, since it has no sense in returning all errors.
|
||||
var lastWarn error
|
||||
// Return only the first error, since it has no sense in returning all errors.
|
||||
var firstWarn error
|
||||
for i := range mrs {
|
||||
mr := &mrs[i]
|
||||
if math.IsNaN(mr.Value) {
|
||||
@@ -920,13 +1057,19 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
|
||||
}
|
||||
if mr.Timestamp < minTimestamp {
|
||||
// Skip rows with too small timestamps outside the retention.
|
||||
lastWarn = fmt.Errorf("cannot insert row with too small timestamp %d outside the retention; minimum allowed timestamp is %d", mr.Timestamp, minTimestamp)
|
||||
if firstWarn == nil {
|
||||
firstWarn = fmt.Errorf("cannot insert row with too small timestamp %d outside the retention; minimum allowed timestamp is %d",
|
||||
mr.Timestamp, minTimestamp)
|
||||
}
|
||||
atomic.AddUint64(&s.tooSmallTimestampRows, 1)
|
||||
continue
|
||||
}
|
||||
if mr.Timestamp > maxTimestamp {
|
||||
// Skip rows with too big timestamps significantly exceeding the current time.
|
||||
lastWarn = fmt.Errorf("cannot insert row with too big timestamp %d exceeding the current time; maximum allowd timestamp is %d", mr.Timestamp, maxTimestamp)
|
||||
if firstWarn == nil {
|
||||
firstWarn = fmt.Errorf("cannot insert row with too big timestamp %d exceeding the current time; maximum allowd timestamp is %d",
|
||||
mr.Timestamp, maxTimestamp)
|
||||
}
|
||||
atomic.AddUint64(&s.tooBigTimestampRows, 1)
|
||||
continue
|
||||
}
|
||||
@@ -941,76 +1084,170 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
|
||||
r.TSID = prevTSID
|
||||
continue
|
||||
}
|
||||
if s.getTSIDFromCache(&r.TSID, mr.MetricNameRaw) {
|
||||
if !dmis.Has(r.TSID.MetricID) {
|
||||
if s.getTSIDFromCache(&r.TSID, mr.MetricNameRaw) && !dmis.Has(r.TSID.MetricID) {
|
||||
// Fast path - the TSID for the given MetricName has been found in cache and isn't deleted.
|
||||
prevTSID = r.TSID
|
||||
prevMetricNameRaw = mr.MetricNameRaw
|
||||
continue
|
||||
}
|
||||
|
||||
// Slow path - the TSID is missing in the cache.
|
||||
// Postpone its search in the loop below.
|
||||
j--
|
||||
if pmrs == nil {
|
||||
pmrs = getPendingMetricRows()
|
||||
}
|
||||
if err := pmrs.addRow(mr); err != nil {
|
||||
// Do not stop adding rows on error - just skip invalid row.
|
||||
// This guarantees that invalid rows don't prevent
|
||||
// from adding valid rows into the storage.
|
||||
if firstWarn == nil {
|
||||
firstWarn = err
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
if pmrs != nil {
|
||||
atomic.AddUint64(&s.slowRowInserts, uint64(len(pmrs.pmrs)))
|
||||
// Sort pendingMetricRows by canonical metric name in order to speed up search via `is` in the loop below.
|
||||
pendingMetricRows := pmrs.pmrs
|
||||
sort.Slice(pendingMetricRows, func(i, j int) bool {
|
||||
return string(pendingMetricRows[i].MetricName) < string(pendingMetricRows[j].MetricName)
|
||||
})
|
||||
is := idb.getIndexSearch()
|
||||
prevMetricNameRaw = nil
|
||||
for i := range pendingMetricRows {
|
||||
pmr := &pendingMetricRows[i]
|
||||
mr := &pmr.mr
|
||||
r := &rows[rowsLen+j]
|
||||
j++
|
||||
r.Timestamp = mr.Timestamp
|
||||
r.Value = mr.Value
|
||||
r.PrecisionBits = precisionBits
|
||||
if string(mr.MetricNameRaw) == string(prevMetricNameRaw) {
|
||||
// Fast path - the current mr contains the same metric name as the previous mr, so it contains the same TSID.
|
||||
// This path should trigger on bulk imports when many rows contain the same MetricNameRaw.
|
||||
r.TSID = prevTSID
|
||||
continue
|
||||
}
|
||||
if s.getTSIDFromCache(&r.TSID, mr.MetricNameRaw) && !dmis.Has(r.TSID.MetricID) {
|
||||
// Fast path - the TSID for the given MetricName has been found in cache and isn't deleted.
|
||||
prevTSID = r.TSID
|
||||
prevMetricNameRaw = mr.MetricNameRaw
|
||||
continue
|
||||
}
|
||||
if err := is.GetOrCreateTSIDByName(&r.TSID, pmr.MetricName); err != nil {
|
||||
// Do not stop adding rows on error - just skip invalid row.
|
||||
// This guarantees that invalid rows don't prevent
|
||||
// from adding valid rows into the storage.
|
||||
if firstWarn == nil {
|
||||
firstWarn = fmt.Errorf("cannot obtain or create TSID for MetricName %q: %s", pmr.MetricName, err)
|
||||
}
|
||||
j--
|
||||
continue
|
||||
}
|
||||
s.putTSIDToCache(&r.TSID, mr.MetricNameRaw)
|
||||
}
|
||||
|
||||
// Slow path - the TSID is missing in the cache. Search for it in the index.
|
||||
if is == nil {
|
||||
is = idb.getIndexSearch()
|
||||
mn = GetMetricName()
|
||||
kb = kbPool.Get()
|
||||
}
|
||||
if err := mn.unmarshalRaw(mr.MetricNameRaw); err != nil {
|
||||
// Do not stop adding rows on error - just skip invalid row.
|
||||
// This guarantees that invalid rows don't prevent
|
||||
// from adding valid rows into the storage.
|
||||
lastWarn = fmt.Errorf("cannot unmarshal MetricNameRaw %q: %s", mr.MetricNameRaw, err)
|
||||
j--
|
||||
continue
|
||||
}
|
||||
mn.sortTags()
|
||||
kb.B = mn.Marshal(kb.B[:0])
|
||||
if err := is.GetOrCreateTSIDByName(&r.TSID, kb.B); err != nil {
|
||||
// Do not stop adding rows on error - just skip invalid row.
|
||||
// This guarantees that invalid rows don't prevent
|
||||
// from adding valid rows into the storage.
|
||||
lastWarn = fmt.Errorf("cannot obtain TSID for MetricName %q: %s", kb.B, err)
|
||||
j--
|
||||
continue
|
||||
}
|
||||
s.putTSIDToCache(&r.TSID, mr.MetricNameRaw)
|
||||
}
|
||||
if lastWarn != nil {
|
||||
logger.Errorf("warn occurred during rows addition: %s", lastWarn)
|
||||
}
|
||||
if is != nil {
|
||||
kbPool.Put(kb)
|
||||
PutMetricName(mn)
|
||||
idb.putIndexSearch(is)
|
||||
putPendingMetricRows(pmrs)
|
||||
}
|
||||
if firstWarn != nil {
|
||||
logger.Errorf("warn occurred during rows addition: %s", firstWarn)
|
||||
}
|
||||
rows = rows[:rowsLen+j]
|
||||
|
||||
var lastError error
|
||||
var firstError error
|
||||
if err := s.tb.AddRows(rows); err != nil {
|
||||
lastError = fmt.Errorf("cannot add rows to table: %s", err)
|
||||
firstError = fmt.Errorf("cannot add rows to table: %s", err)
|
||||
}
|
||||
if err := s.updatePerDateData(rows, lastError); err != nil && lastError == nil {
|
||||
lastError = fmt.Errorf("cannot update per-date data: %s", err)
|
||||
if err := s.updatePerDateData(rows); err != nil && firstError == nil {
|
||||
firstError = fmt.Errorf("cannot update per-date data: %s", err)
|
||||
}
|
||||
if lastError != nil {
|
||||
return rows, fmt.Errorf("error occurred during rows addition: %s", lastError)
|
||||
if firstError != nil {
|
||||
return rows, fmt.Errorf("error occurred during rows addition: %s", firstError)
|
||||
}
|
||||
return rows, nil
|
||||
}
|
||||
|
||||
func (s *Storage) updatePerDateData(rows []rawRow, lastError error) error {
|
||||
type pendingMetricRow struct {
|
||||
MetricName []byte
|
||||
mr MetricRow
|
||||
}
|
||||
|
||||
type pendingMetricRows struct {
|
||||
pmrs []pendingMetricRow
|
||||
metricNamesBuf []byte
|
||||
|
||||
lastMetricNameRaw []byte
|
||||
lastMetricName []byte
|
||||
mn MetricName
|
||||
}
|
||||
|
||||
func (pmrs *pendingMetricRows) reset() {
|
||||
for _, pmr := range pmrs.pmrs {
|
||||
pmr.MetricName = nil
|
||||
pmr.mr.MetricNameRaw = nil
|
||||
}
|
||||
pmrs.pmrs = pmrs.pmrs[:0]
|
||||
pmrs.metricNamesBuf = pmrs.metricNamesBuf[:0]
|
||||
pmrs.lastMetricNameRaw = nil
|
||||
pmrs.lastMetricName = nil
|
||||
pmrs.mn.Reset()
|
||||
}
|
||||
|
||||
func (pmrs *pendingMetricRows) addRow(mr *MetricRow) error {
|
||||
// Do not spend CPU time on re-calculating canonical metricName during bulk import
|
||||
// of many rows for the same metric.
|
||||
if string(mr.MetricNameRaw) != string(pmrs.lastMetricNameRaw) {
|
||||
if err := pmrs.mn.unmarshalRaw(mr.MetricNameRaw); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal MetricNameRaw %q: %s", mr.MetricNameRaw, err)
|
||||
}
|
||||
pmrs.mn.sortTags()
|
||||
metricNamesBufLen := len(pmrs.metricNamesBuf)
|
||||
pmrs.metricNamesBuf = pmrs.mn.Marshal(pmrs.metricNamesBuf)
|
||||
pmrs.lastMetricName = pmrs.metricNamesBuf[metricNamesBufLen:]
|
||||
pmrs.lastMetricNameRaw = mr.MetricNameRaw
|
||||
}
|
||||
pmrs.pmrs = append(pmrs.pmrs, pendingMetricRow{
|
||||
MetricName: pmrs.lastMetricName,
|
||||
mr: *mr,
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPendingMetricRows() *pendingMetricRows {
|
||||
v := pendingMetricRowsPool.Get()
|
||||
if v == nil {
|
||||
v = &pendingMetricRows{}
|
||||
}
|
||||
return v.(*pendingMetricRows)
|
||||
}
|
||||
|
||||
func putPendingMetricRows(pmrs *pendingMetricRows) {
|
||||
pmrs.reset()
|
||||
pendingMetricRowsPool.Put(pmrs)
|
||||
}
|
||||
|
||||
var pendingMetricRowsPool sync.Pool
|
||||
|
||||
func (s *Storage) updatePerDateData(rows []rawRow) error {
|
||||
var date uint64
|
||||
var hour uint64
|
||||
var prevTimestamp int64
|
||||
var (
|
||||
// These vars are used for speeding up bulk imports when multiple adjancent rows
|
||||
// contain the same (metricID, date) pairs.
|
||||
prevMatchedDate uint64
|
||||
prevMatchedMetricID uint64
|
||||
prevDate uint64
|
||||
prevMetricID uint64
|
||||
)
|
||||
idb := s.idb()
|
||||
hm := s.currHourMetricIDs.Load().(*hourMetricIDs)
|
||||
nextDayMetricIDs := &s.nextDayMetricIDs.Load().(*byDateMetricIDEntry).v
|
||||
todayShare16bit := uint64((float64(fasttime.UnixTimestamp()%(3600*24)) / (3600 * 24)) * (1 << 16))
|
||||
type pendingDateMetricID struct {
|
||||
date uint64
|
||||
metricID uint64
|
||||
}
|
||||
var pendingDateMetricIDs []pendingDateMetricID
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
if r.Timestamp != prevTimestamp {
|
||||
@@ -1024,6 +1261,21 @@ func (s *Storage) updatePerDateData(rows []rawRow, lastError error) error {
|
||||
if hm.m.Has(metricID) {
|
||||
// Fast path: the metricID is in the current hour cache.
|
||||
// This means the metricID has been already added to per-day inverted index.
|
||||
|
||||
// Gradually pre-populate per-day inverted index for the next day
|
||||
// during the current day.
|
||||
// This should reduce CPU usage spike and slowdown at the beginning of the next day
|
||||
// when entries for all the active time series must be added to the index.
|
||||
// This should address https://github.com/VictoriaMetrics/VictoriaMetrics/issues/430 .
|
||||
if todayShare16bit > (metricID&(1<<16-1)) && !nextDayMetricIDs.Has(metricID) {
|
||||
pendingDateMetricIDs = append(pendingDateMetricIDs, pendingDateMetricID{
|
||||
date: date + 1,
|
||||
metricID: metricID,
|
||||
})
|
||||
s.pendingNextDayMetricIDsLock.Lock()
|
||||
s.pendingNextDayMetricIDs.Add(metricID)
|
||||
s.pendingNextDayMetricIDsLock.Unlock()
|
||||
}
|
||||
continue
|
||||
}
|
||||
s.pendingHourEntriesLock.Lock()
|
||||
@@ -1032,29 +1284,80 @@ func (s *Storage) updatePerDateData(rows []rawRow, lastError error) error {
|
||||
}
|
||||
|
||||
// Slower path: check global cache for (date, metricID) entry.
|
||||
if metricID == prevMatchedMetricID && date == prevMatchedDate {
|
||||
if metricID == prevMetricID && date == prevDate {
|
||||
// Fast path for bulk import of multiple rows with the same (date, metricID) pairs.
|
||||
continue
|
||||
}
|
||||
prevDate = date
|
||||
prevMetricID = metricID
|
||||
|
||||
if !s.dateMetricIDCache.Has(date, metricID) {
|
||||
// Slow path: store the (date, metricID) entry in the indexDB.
|
||||
// It is OK if the (date, metricID) entry is added multiple times to db
|
||||
// by concurrent goroutines.
|
||||
pendingDateMetricIDs = append(pendingDateMetricIDs, pendingDateMetricID{
|
||||
date: date,
|
||||
metricID: metricID,
|
||||
})
|
||||
}
|
||||
}
|
||||
if len(pendingDateMetricIDs) == 0 {
|
||||
// Fast path - there are no new (date, metricID) entires in rows.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Slow path - add new (date, metricID) entries to indexDB.
|
||||
|
||||
atomic.AddUint64(&s.slowPerDayIndexInserts, uint64(len(pendingDateMetricIDs)))
|
||||
// Sort pendingDateMetricIDs by (date, metricID) in order to speed up `is` search in the loop below.
|
||||
sort.Slice(pendingDateMetricIDs, func(i, j int) bool {
|
||||
a := pendingDateMetricIDs[i]
|
||||
b := pendingDateMetricIDs[j]
|
||||
if a.date != b.date {
|
||||
return a.date < b.date
|
||||
}
|
||||
return a.metricID < b.metricID
|
||||
})
|
||||
idb := s.idb()
|
||||
is := idb.getIndexSearch()
|
||||
defer idb.putIndexSearch(is)
|
||||
var firstError error
|
||||
prevMetricID = 0
|
||||
prevDate = 0
|
||||
for _, dateMetricID := range pendingDateMetricIDs {
|
||||
date := dateMetricID.date
|
||||
metricID := dateMetricID.metricID
|
||||
if metricID == prevMetricID && date == prevDate {
|
||||
// Fast path for bulk import of multiple rows with the same (date, metricID) pairs.
|
||||
continue
|
||||
}
|
||||
prevDate = date
|
||||
prevMetricID = metricID
|
||||
|
||||
if s.dateMetricIDCache.Has(date, metricID) {
|
||||
// The metricID has been already added to per-day inverted index.
|
||||
prevMatchedDate = date
|
||||
prevMatchedMetricID = metricID
|
||||
continue
|
||||
}
|
||||
|
||||
// Slow path: store the (date, metricID) entry in the indexDB.
|
||||
// It is OK if the (date, metricID) entry is added multiple times to db
|
||||
// by concurrent goroutines.
|
||||
if err := idb.storeDateMetricID(date, metricID); err != nil {
|
||||
lastError = err
|
||||
ok, err := is.hasDateMetricID(date, metricID)
|
||||
if err != nil {
|
||||
if firstError == nil {
|
||||
firstError = fmt.Errorf("error when locating (date=%d, metricID=%d) in database: %s", date, metricID, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if !ok {
|
||||
// The (date, metricID) entry is missing in the indexDB. Add it there.
|
||||
if err := is.storeDateMetricID(date, metricID); err != nil {
|
||||
if firstError == nil {
|
||||
firstError = fmt.Errorf("error when storing (date=%d, metricID=%d) in database: %s", date, metricID, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
// The metric must be added to cache only after it has been successfully added to indexDB.
|
||||
s.dateMetricIDCache.Set(date, metricID)
|
||||
}
|
||||
return lastError
|
||||
return firstError
|
||||
}
|
||||
|
||||
// dateMetricIDCache is fast cache for holding (date, metricID) entries.
|
||||
@@ -1070,7 +1373,7 @@ type dateMetricIDCache struct {
|
||||
|
||||
// Contains mutable map protected by mu
|
||||
byDateMutable *byDateMetricIDMap
|
||||
lastSyncTime time.Time
|
||||
lastSyncTime uint64
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
@@ -1085,7 +1388,7 @@ func (dmc *dateMetricIDCache) Reset() {
|
||||
// Do not reset syncsCount and resetsCount
|
||||
dmc.byDate.Store(newByDateMetricIDMap())
|
||||
dmc.byDateMutable = newByDateMetricIDMap()
|
||||
dmc.lastSyncTime = time.Now()
|
||||
dmc.lastSyncTime = fasttime.UnixTimestamp()
|
||||
dmc.mu.Unlock()
|
||||
|
||||
atomic.AddUint64(&dmc.resetsCount, 1)
|
||||
@@ -1119,13 +1422,12 @@ func (dmc *dateMetricIDCache) Has(date, metricID uint64) bool {
|
||||
}
|
||||
|
||||
// Slow path. Check mutable map.
|
||||
currentTime := time.Now()
|
||||
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
dmc.mu.Lock()
|
||||
v = dmc.byDateMutable.get(date)
|
||||
ok := v.Has(metricID)
|
||||
mustSync := false
|
||||
if currentTime.Sub(dmc.lastSyncTime) > 10*time.Second {
|
||||
if currentTime-dmc.lastSyncTime > 10 {
|
||||
mustSync = true
|
||||
dmc.lastSyncTime = currentTime
|
||||
}
|
||||
@@ -1207,13 +1509,36 @@ type byDateMetricIDEntry struct {
|
||||
v uint64set.Set
|
||||
}
|
||||
|
||||
func (s *Storage) updateNextDayMetricIDs() {
|
||||
date := fasttime.UnixDate()
|
||||
e := s.nextDayMetricIDs.Load().(*byDateMetricIDEntry)
|
||||
s.pendingNextDayMetricIDsLock.Lock()
|
||||
pendingMetricIDs := s.pendingNextDayMetricIDs
|
||||
s.pendingNextDayMetricIDs = &uint64set.Set{}
|
||||
s.pendingNextDayMetricIDsLock.Unlock()
|
||||
if pendingMetricIDs.Len() == 0 && e.date == date {
|
||||
// Fast path: nothing to update.
|
||||
return
|
||||
}
|
||||
|
||||
// Slow path: union pendingMetricIDs with e.v
|
||||
if e.date == date {
|
||||
pendingMetricIDs.Union(&e.v)
|
||||
}
|
||||
eNew := &byDateMetricIDEntry{
|
||||
date: date,
|
||||
v: *pendingMetricIDs,
|
||||
}
|
||||
s.nextDayMetricIDs.Store(eNew)
|
||||
}
|
||||
|
||||
func (s *Storage) updateCurrHourMetricIDs() {
|
||||
hm := s.currHourMetricIDs.Load().(*hourMetricIDs)
|
||||
s.pendingHourEntriesLock.Lock()
|
||||
newMetricIDs := s.pendingHourEntries
|
||||
s.pendingHourEntries = &uint64set.Set{}
|
||||
s.pendingHourEntriesLock.Unlock()
|
||||
hour := uint64(timestampFromTime(time.Now())) / msecPerHour
|
||||
hour := fasttime.UnixHour()
|
||||
if newMetricIDs.Len() == 0 && hm.hour == hour {
|
||||
// Fast path: nothing to update.
|
||||
return
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user