mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-08 11:23:53 +03:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd1126dccf |
46
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
46
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
It would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verify whether the bug is reproducible there.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/#troubleshooting).
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior.
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Logs**
|
||||
Check if any warnings or errors were logged by VictoriaMetrics components
|
||||
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
|
||||
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
|
||||
|
||||
See how to setup monitoring here:
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
|
||||
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
|
||||
|
||||
**Version**
|
||||
The line returned when passing `--version` command line flag to the binary. For example:
|
||||
```
|
||||
$ ./victoria-metrics-prod --version
|
||||
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
|
||||
```
|
||||
|
||||
**Used command-line flags**
|
||||
Please provide the command-line flags used for running VictoriaMetrics and its components.
|
||||
86
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
86
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -1,86 +0,0 @@
|
||||
name: Bug report
|
||||
description: Create a report to help us improve
|
||||
labels: [bug]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
|
||||
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
and verify whether the bug is reproducible there.
|
||||
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html) first.
|
||||
- type: textarea
|
||||
id: describe-the-bug
|
||||
attributes:
|
||||
label: Describe the bug
|
||||
description: |
|
||||
A clear and concise description of what the bug is.
|
||||
placeholder: |
|
||||
When I do `A` VictoriaMetrics does `B`. I expect it to do `C`.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: to-reproduce
|
||||
attributes:
|
||||
label: To Reproduce
|
||||
description: |
|
||||
Steps to reproduce the behavior.
|
||||
If reproducing an issue requires some specific configuration file, please paste it here.
|
||||
placeholder: |
|
||||
Steps to reproduce the behavior.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Version
|
||||
description: |
|
||||
The line returned when passing `--version` command line flag to the binary. For example:
|
||||
```
|
||||
$ ./victoria-metrics-prod --version
|
||||
victoria-metrics-20190730-121249-heads-single-node-0-g671d9e55
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Logs
|
||||
description: |
|
||||
Check if any warnings or errors were logged by VictoriaMetrics components
|
||||
or components in communication with VictoriaMetrics (e.g. Prometheus, Grafana).
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: screenshots
|
||||
attributes:
|
||||
label: Screenshots
|
||||
description: |
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
For VictoriaMetrics health-state issues please provide full-length screenshots
|
||||
of Grafana dashboards if possible:
|
||||
* [Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
* [Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
|
||||
See how to setup monitoring here:
|
||||
* [monitoring for single-node VictoriaMetrics](https://docs.victoriametrics.com/#monitoring)
|
||||
* [monitoring for VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: flags
|
||||
attributes:
|
||||
label: Used command-line flags
|
||||
description: |
|
||||
Please provide the command-line flags used for running VictoriaMetrics and its components.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Additional information
|
||||
placeholder: |
|
||||
Additional information that doesn't fit elsewhere
|
||||
validations:
|
||||
required: false
|
||||
5
.github/ISSUE_TEMPLATE/configuration.yml
vendored
5
.github/ISSUE_TEMPLATE/configuration.yml
vendored
@@ -1,5 +0,0 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: Ask on Slack
|
||||
url: https://slack.victoriametrics.com/
|
||||
about: You can ask for help here!
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
43
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
43
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
@@ -1,43 +0,0 @@
|
||||
name: Feature request
|
||||
description: Suggest an idea for this project
|
||||
labels: [enhancement]
|
||||
body:
|
||||
- type: textarea
|
||||
id: describe-the-problem
|
||||
attributes:
|
||||
label: Is your feature request related to a problem? Please describe
|
||||
description: |
|
||||
A clear and concise description of what the problem is.
|
||||
placeholder: |
|
||||
Ex. I'm always frustrated when [...]
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: describe-the-solution
|
||||
attributes:
|
||||
label: Describe the solution you'd like
|
||||
description: |
|
||||
A clear and concise description of what you want to happen.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: alternative-solutions
|
||||
attributes:
|
||||
label: Describe alternatives you've considered
|
||||
description: |
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
placeholder: |
|
||||
I have tried to do `A`, but that doesn't solve a problem completely.
|
||||
I have tried to do `A` and `B`, but implementing this would be better.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: feature-additional-info
|
||||
attributes:
|
||||
label: Additional information
|
||||
description: |
|
||||
Additional information which you consider helpful for implementing this feature.
|
||||
placeholder: |
|
||||
Add any other context or screenshots about the feature request here.
|
||||
validations:
|
||||
required: false
|
||||
2
.github/workflows/check-licenses.yml
vendored
2
.github/workflows/check-licenses.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.19.5
|
||||
go-version: 1.18
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
42
.github/workflows/codeql-analysis-js.yml
vendored
42
.github/workflows/codeql-analysis-js.yml
vendored
@@ -1,42 +0,0 @@
|
||||
name: "CodeQL - JS"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, cluster]
|
||||
paths:
|
||||
- "**.js"
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [master, cluster]
|
||||
paths:
|
||||
- "**.js"
|
||||
schedule:
|
||||
- cron: "30 18 * * 2"
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: ["javascript"]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
with:
|
||||
category: "javascript"
|
||||
74
.github/workflows/codeql-analysis.yml
vendored
74
.github/workflows/codeql-analysis.yml
vendored
@@ -13,20 +13,12 @@ name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, cluster]
|
||||
paths-ignore:
|
||||
- "**.md"
|
||||
- "**.txt"
|
||||
- "**.js"
|
||||
branches: [ master, cluster ]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [master, cluster]
|
||||
paths-ignore:
|
||||
- "**.md"
|
||||
- "**.txt"
|
||||
- "**.js"
|
||||
branches: [ master, cluster ]
|
||||
schedule:
|
||||
- cron: "30 18 * * 2"
|
||||
- cron: '30 18 * * 2'
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
@@ -40,47 +32,39 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: ["go"]
|
||||
language: [ 'go', 'javascript' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.19.5
|
||||
check-latest: true
|
||||
cache: true
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
|
||||
66
.github/workflows/main-test.yml
vendored
66
.github/workflows/main-test.yml
vendored
@@ -1,66 +0,0 @@
|
||||
name: main - test
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- cluster
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- cluster
|
||||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.19.5
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
make install-golangci-lint
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
|
||||
test:
|
||||
needs: lint
|
||||
strategy:
|
||||
matrix:
|
||||
scenario: ["test-full", "test-pure", "test-full-386"]
|
||||
name: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.19.5
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
46
.github/workflows/main.yml
vendored
46
.github/workflows/main.yml
vendored
@@ -1,10 +1,13 @@
|
||||
name: main
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["main - test"]
|
||||
types:
|
||||
- completed
|
||||
|
||||
push:
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- '**.md'
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- '**.md'
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -13,17 +16,32 @@ jobs:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v3
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.19.5
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- name: Build
|
||||
go-version: 1.18
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
- name: Dependencies
|
||||
run: |
|
||||
make install-golint
|
||||
make install-errcheck
|
||||
make install-golangci-lint
|
||||
- name: Build
|
||||
env:
|
||||
GO111MODULE: on
|
||||
run: |
|
||||
export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
make test-full
|
||||
make test-pure
|
||||
make test-full-386
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
|
||||
|
||||
39
.github/workflows/nightly-build.yml
vendored
39
.github/workflows/nightly-build.yml
vendored
@@ -1,39 +0,0 @@
|
||||
name: nightly-build
|
||||
on:
|
||||
schedule:
|
||||
# Daily at 2:48am
|
||||
- cron: '48 2 * * *'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
-
|
||||
name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.19.5
|
||||
id: go
|
||||
-
|
||||
name: Setup docker scan
|
||||
run: |
|
||||
mkdir -p ~/.docker/cli-plugins && \
|
||||
curl https://github.com/docker/scan-cli-plugin/releases/latest/download/docker-scan_linux_amd64 -L -s -S -o ~/.docker/cli-plugins/docker-scan &&\
|
||||
chmod +x ~/.docker/cli-plugins/docker-scan
|
||||
-
|
||||
name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
-
|
||||
name: Publish
|
||||
run: |
|
||||
LATEST_TAG=nightly PKG_TAG=nightly make publish
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -19,5 +19,4 @@
|
||||
.DS_store
|
||||
Gemfile.lock
|
||||
/_site
|
||||
_site
|
||||
*.tmp
|
||||
_site
|
||||
@@ -1,15 +0,0 @@
|
||||
run:
|
||||
timeout: 2m
|
||||
|
||||
enable:
|
||||
- revive
|
||||
|
||||
issues:
|
||||
exclude-rules:
|
||||
- linters:
|
||||
- staticcheck
|
||||
text: "SA(4003|1019|5011):"
|
||||
|
||||
linters-settings:
|
||||
errcheck:
|
||||
exclude: ./errcheck_excludes.txt
|
||||
@@ -3,4 +3,3 @@ allowlist:
|
||||
- MIT
|
||||
- BSD-3-Clause
|
||||
- BSD-2-Clause
|
||||
- ISC
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2023 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2022 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
67
Makefile
67
Makefile
@@ -2,8 +2,7 @@ PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
|
||||
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
|
||||
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
|
||||
LATEST_TAG ?= latest
|
||||
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -c 10-17)))
|
||||
|
||||
PKG_TAG ?= $(shell git tag -l --points-at HEAD)
|
||||
ifeq ($(PKG_TAG),)
|
||||
@@ -167,10 +166,13 @@ vmutils-crossbuild: \
|
||||
vmutils-windows-amd64
|
||||
|
||||
publish-release:
|
||||
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
|
||||
git checkout $(TAG) && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) release publish && \
|
||||
git checkout $(TAG) && git push $(TAG) && git push $(TAG)-cluster && \
|
||||
$(MAKE) github-create-release && \
|
||||
$(MAKE) github-upload-assets
|
||||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
@@ -315,7 +317,29 @@ vet:
|
||||
go vet ./lib/...
|
||||
go vet ./app/...
|
||||
|
||||
check-all: fmt vet golangci-lint govulncheck
|
||||
lint: install-golint
|
||||
golint lib/...
|
||||
golint app/...
|
||||
|
||||
install-golint:
|
||||
which golint || GO111MODULE=off go get golang.org/x/lint/golint
|
||||
|
||||
errcheck: install-errcheck
|
||||
errcheck -exclude=errcheck_excludes.txt ./lib/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vminsert/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmselect/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmstorage/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmagent/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmauth/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...
|
||||
errcheck -exclude=errcheck_excludes.txt ./app/vmctl/...
|
||||
|
||||
install-errcheck:
|
||||
which errcheck || GO111MODULE=off go get github.com/kisielk/errcheck
|
||||
|
||||
check-all: fmt vet lint errcheck golangci-lint
|
||||
|
||||
test:
|
||||
go test ./lib/... ./app/...
|
||||
@@ -343,7 +367,7 @@ benchmark-pure:
|
||||
vendor-update:
|
||||
go get -u -d ./lib/...
|
||||
go get -u -d ./app/...
|
||||
go mod tidy -compat=1.19
|
||||
go mod tidy -compat=1.18
|
||||
go mod vendor
|
||||
|
||||
app-local:
|
||||
@@ -362,43 +386,30 @@ quicktemplate-gen: install-qtc
|
||||
qtc
|
||||
|
||||
install-qtc:
|
||||
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
|
||||
which qtc || GO111MODULE=off go get github.com/valyala/quicktemplate/qtc
|
||||
|
||||
|
||||
golangci-lint: install-golangci-lint
|
||||
golangci-lint run
|
||||
golangci-lint run --exclude '(SA4003|SA1019|SA5011):' -D errcheck -D structcheck --timeout 2m
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.50.1
|
||||
|
||||
govulncheck: install-govulncheck
|
||||
govulncheck ./...
|
||||
|
||||
install-govulncheck:
|
||||
which govulncheck || go install golang.org/x/vuln/cmd/govulncheck@latest
|
||||
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.48.0
|
||||
|
||||
install-wwhrd:
|
||||
which wwhrd || go install github.com/frapposelli/wwhrd@latest
|
||||
which wwhrd || GO111MODULE=off go get github.com/frapposelli/wwhrd
|
||||
|
||||
check-licenses: install-wwhrd
|
||||
wwhrd check -f .wwhrd.yml
|
||||
|
||||
copy-docs:
|
||||
echo '' > ${DST}
|
||||
@if [ ${ORDER} -ne 0 ]; then \
|
||||
echo "---\nsort: ${ORDER}\n---\n" > ${DST}; \
|
||||
fi
|
||||
echo "---\nsort: ${ORDER}\n---\n" > ${DST}
|
||||
cat ${SRC} >> ${DST}
|
||||
sed -i='.tmp' 's/<img src=\"docs\//<img src=\"/' ${DST}
|
||||
rm -rf docs/*.tmp
|
||||
|
||||
# Copies docs for all components and adds the order tag.
|
||||
# For ORDER=0 it adds no order tag.
|
||||
# Images starting with <img src="docs/ are replaced with <img src="
|
||||
# Cluster docs are supposed to be ordered as 9th.
|
||||
# The rest of docs is ordered manually.
|
||||
# For The rest of docs is ordered manually.t
|
||||
docs-sync:
|
||||
SRC=README.md DST=docs/README.md ORDER=0 $(MAKE) copy-docs
|
||||
cp README.md docs/README.md
|
||||
SRC=README.md DST=docs/Single-server-VictoriaMetrics.md ORDER=1 $(MAKE) copy-docs
|
||||
SRC=app/vmagent/README.md DST=docs/vmagent.md ORDER=3 $(MAKE) copy-docs
|
||||
SRC=app/vmalert/README.md DST=docs/vmalert.md ORDER=4 $(MAKE) copy-docs
|
||||
|
||||
530
README.md
530
README.md
@@ -24,14 +24,12 @@ Learn more about [key concepts](https://docs.victoriametrics.com/keyConcepts.htm
|
||||
[quick start guide](https://docs.victoriametrics.com/Quick-Start.html) for a better experience.
|
||||
|
||||
[Contact us](mailto:info@victoriametrics.com) if you need enterprise support for VictoriaMetrics.
|
||||
See [features available in enterprise package](https://docs.victoriametrics.com/enterprise.html).
|
||||
See [features available in enterprise package](https://victoriametrics.com/products/enterprise/).
|
||||
Enterprise binaries can be downloaded and evaluated for free
|
||||
from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
|
||||
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking the [CHANGELOG](https://docs.victoriametrics.com/CHANGELOG.html) and performing [regular upgrades](#how-to-upgrade-victoriametrics).
|
||||
|
||||
VictoriaMetrics has achieved security certifications for Database Software Development and Software-Based Monitoring Services. We apply strict security measures in everything we do. See our [Security page](https://victoriametrics.com/security/) for more details.
|
||||
|
||||
## Prominent features
|
||||
|
||||
VictoriaMetrics has the following prominent features:
|
||||
@@ -40,36 +38,19 @@ VictoriaMetrics has the following prominent features:
|
||||
* It can be used as a drop-in replacement for Prometheus in Grafana, because it supports [Prometheus querying API](#prometheus-querying-api-usage).
|
||||
* It can be used as a drop-in replacement for Graphite in Grafana, because it supports [Graphite API](#graphite-api-usage).
|
||||
* It features easy setup and operation:
|
||||
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d)
|
||||
without external dependencies.
|
||||
* VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
|
||||
* All the configuration is done via explicit command-line flags with reasonable defaults.
|
||||
* All the data is stored in a single directory pointed by `-storageDataPath` command-line flag.
|
||||
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
|
||||
can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools.
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
* It implements PromQL-like query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
|
||||
* Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) to S3 or GCS can be done with [vmbackup](https://docs.victoriametrics.com/vmbackup.html) / [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools. See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
* It implements PromQL-based query language - [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which provides improved functionality on top of PromQL.
|
||||
* It provides global query view. Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics. Later this data may be queried via a single query.
|
||||
* It provides high performance and good vertical and horizontal scalability for both
|
||||
[data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
|
||||
and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
|
||||
It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
|
||||
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893)
|
||||
and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f)
|
||||
when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
|
||||
* It provides high performance and good vertical and horizontal scalability for both [data ingestion](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) and [data querying](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4). It [outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
|
||||
* It [uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and [up to 7x less RAM than Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f) when dealing with millions of unique time series (aka [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)).
|
||||
* It is optimized for time series with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate).
|
||||
* It provides high data compression, so up to 70x more data points may be stored into limited storage comparing to TimescaleDB
|
||||
according to [these benchmarks](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
|
||||
and up to 7x less storage space is required compared to Prometheus, Thanos or Cortex
|
||||
according to [this benchmark](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
|
||||
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc).
|
||||
See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
|
||||
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
|
||||
See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
|
||||
[comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
|
||||
and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
|
||||
from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
|
||||
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to
|
||||
[the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
* It provides high data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) may be crammed into limited storage comparing to TimescaleDB and [up to 7x less storage space is required compared to Prometheus, Thanos or Cortex](https://valyala.medium.com/prometheus-vs-victoriametrics-benchmark-on-node-exporter-metrics-4ca29c75590f).
|
||||
* It is optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [disk IO graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
|
||||
* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB. See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae), [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683) and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
|
||||
* It protects the storage from data corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
* It supports metrics' scraping, ingestion and [backfilling](#backfilling) via the following protocols:
|
||||
* [Metrics scraping from Prometheus exporters](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* [Prometheus remote write API](#prometheus-setup).
|
||||
@@ -82,15 +63,11 @@ VictoriaMetrics has the following prominent features:
|
||||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
* [Native binary format](#how-to-import-data-in-native-format).
|
||||
* [DataDog agent or DogStatsD](#how-to-send-data-from-datadog-agent).
|
||||
* It supports powerful [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html), which can be used as a [statsd](https://github.com/statsd/statsd) alternative.
|
||||
* It supports metrics [relabeling](#relabeling).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and
|
||||
[high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data
|
||||
and various [Enterprise workloads](https://docs.victoriametrics.com/enterprise.html).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
* It ideally works with big amounts of time series data from APM, Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various [Enterprise workloads](https://victoriametrics.com/products/enterprise/).
|
||||
* It has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/)
|
||||
and [Google Filestore](https://cloud.google.com/filestore).
|
||||
* It can store data on [NFS-based storages](https://en.wikipedia.org/wiki/Network_File_System) such as [Amazon EFS](https://aws.amazon.com/efs/) and [Google Filestore](https://cloud.google.com/filestore).
|
||||
|
||||
See also [various Articles about VictoriaMetrics](https://docs.victoriametrics.com/Articles.html).
|
||||
|
||||
@@ -105,6 +82,7 @@ Case studies:
|
||||
* [Brandwatch](https://docs.victoriametrics.com/CaseStudies.html#brandwatch)
|
||||
* [CERN](https://docs.victoriametrics.com/CaseStudies.html#cern)
|
||||
* [COLOPL](https://docs.victoriametrics.com/CaseStudies.html#colopl)
|
||||
* [Dreamteam](https://docs.victoriametrics.com/CaseStudies.html#dreamteam)
|
||||
* [Fly.io](https://docs.victoriametrics.com/CaseStudies.html#flyio)
|
||||
* [German Research Center for Artificial Intelligence](https://docs.victoriametrics.com/CaseStudies.html#german-research-center-for-artificial-intelligence)
|
||||
* [Grammarly](https://docs.victoriametrics.com/CaseStudies.html#grammarly)
|
||||
@@ -154,15 +132,7 @@ VictoriaMetrics is developed at a fast pace, so it is recommended periodically c
|
||||
|
||||
### Environment variables
|
||||
|
||||
All the VictoriaMetrics components allow referring environment variables in command-line flags via `%{ENV_VAR}` syntax.
|
||||
For example, `-metricsAuthKey=%{METRICS_AUTH_KEY}` is automatically expanded to `-metricsAuthKey=top-secret`
|
||||
if `METRICS_AUTH_KEY=top-secret` environment variable exists at VictoriaMetrics startup.
|
||||
This expansion is performed by VictoriaMetrics itself.
|
||||
|
||||
VictoriaMetrics recursively expands `%{ENV_VAR}` references in environment variables on startup.
|
||||
For example, `FOO=%{BAR}` environment variable is expanded to `FOO=abc` if `BAR=a%{BAZ}` and `BAZ=bc`.
|
||||
|
||||
Additionally, all the VictoriaMetrics components allow setting flag values via environment variables according to these rules:
|
||||
Each flag value can be set via environment variables according to these rules:
|
||||
|
||||
* The `-envflag.enable` flag must be set.
|
||||
* Each `.` char in flag name must be substituted with `_` (for example `-insert.maxQueueDuration <duration>` will translate to `insert_maxQueueDuration=<duration>`).
|
||||
@@ -290,14 +260,9 @@ Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](
|
||||
|
||||
VictoriaMetrics provides UI for query troubleshooting and exploration. The UI is available at `http://victoriametrics:8428/vmui`.
|
||||
The UI allows exploring query results via graphs and tables.
|
||||
It also provides the following features:
|
||||
It also provides the ability to [explore cardinality](#cardinality-explorer) and to [investigate query traces](#query-tracing).
|
||||
|
||||
- [metrics explorer](#metrics-explorer)
|
||||
- [cardinality explorer](#cardinality-explorer)
|
||||
- [query tracer](#query-tracing)
|
||||
- [top queries explorer](#top-queries)
|
||||
|
||||
Graphs in `vmui` support scrolling and zooming:
|
||||
Graphs in vmui support scrolling and zooming:
|
||||
|
||||
* Select the needed time range on the graph in order to zoom in into the selected time range. Hold `ctrl` (or `cmd` on MacOS) and scroll down in order to zoom out.
|
||||
* Hold `ctrl` (or `cmd` on MacOS) and scroll up in order to zoom in the area under cursor.
|
||||
@@ -309,49 +274,22 @@ Multi-line queries can be entered by pressing `Shift-Enter` in query input field
|
||||
|
||||
When querying the [backfilled data](https://docs.victoriametrics.com/#backfilling) or during [query troubleshooting](https://docs.victoriametrics.com/Troubleshooting.html#unexpected-query-results), it may be useful disabling response cache by clicking `Disable cache` checkbox.
|
||||
|
||||
VMUI automatically adjusts the interval between datapoints on the graph depending on the horizontal resolution and on the selected time range. The step value can be customized by changing `Step value` input.
|
||||
VMUI automatically adjusts the interval between datapoints on the graph depending on the horizontal resolution and on the selected time range. The step value can be customized by clickhing `Override step value` checkbox.
|
||||
|
||||
VMUI allows investigating correlations between multiple queries on the same graph. Just click `Add Query` button,
|
||||
enter an additional query in the newly appeared input field and press `Enter`.
|
||||
Results for all the queries are displayed simultaneously on the same graph.
|
||||
Graphs for a particular query can be temporarily hidden by clicking the `eye` icon on the right side of the input field.
|
||||
When the `eye` icon is clicked while holding the `ctrl` key, then query results for the rest of queries become hidden
|
||||
except of the current query results.
|
||||
VMUI allows investigating correlations between two queries on the same graph. Just click `+` button, enter the second query in the newly appeared input field and press `Ctrl+Enter`. Results for both queries should be displayed simultaneously on the same graph. Every query has its own vertical scale, which is displayed on the left and the right side of the graph. Lines for the second query are dashed.
|
||||
|
||||
See the [example VMUI at VictoriaMetrics playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/?g0.expr=100%20*%20sum(rate(process_cpu_seconds_total))%20by%20(job)&g0.range_input=1d).
|
||||
|
||||
## Top queries
|
||||
|
||||
[VMUI](#vmui) provides `top queries` tab, which can help determining the following query types:
|
||||
|
||||
* the most frequently executed queries;
|
||||
* queries with the biggest average execution duration;
|
||||
* queries that took the most summary time for execution.
|
||||
|
||||
## Metrics explorer
|
||||
|
||||
[VMUI](#vmui) provides an ability to explore metrics exported by a particular `job` / `instance` in the following way:
|
||||
|
||||
1. Open the `vmui` at `http://victoriametrics:8428/vmui/`.
|
||||
2. Click the `Explore metrics` tab.
|
||||
3. Select the `job` you want to explore.
|
||||
4. Optionally select the `instance` for the selected job to explore.
|
||||
5. Select metrics you want to explore and compare.
|
||||
|
||||
It is possible to change the selected time range for the graphs in the top right corner.
|
||||
|
||||
## Cardinality explorer
|
||||
|
||||
VictoriaMetrics provides an ability to explore time series cardinality at `Explore cardinality` tab in [vmui](#vmui) in the following ways:
|
||||
VictoriaMetrics provides an ability to explore time series cardinality at `cardinality` tab in [vmui](#vmui) in the following ways:
|
||||
|
||||
- To identify metric names with the highest number of series.
|
||||
- To identify labels with the highest number of series.
|
||||
- To identify values with the highest number of series for the selected label (aka `focusLabel`).
|
||||
- To identify label=name pairs with the highest number of series.
|
||||
- To identify labels with the highest number of unique values.
|
||||
Note that [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html)
|
||||
may show lower than expected number of unique label values for labels with small number of unique values.
|
||||
This is because of [implementation limits](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/5a6e617b5e41c9170e7c562aecd15ee0c901d489/app/vmselect/netstorage/netstorage.go#L1039-L1045).
|
||||
|
||||
By default cardinality explorer analyzes time series for the current date. It provides the ability to select different day at the top right corner.
|
||||
By default all the time series for the selected date are analyzed. It is possible to narrow down the analysis to series
|
||||
@@ -360,7 +298,7 @@ matching the specified [series selector](https://prometheus.io/docs/prometheus/l
|
||||
Cardinality explorer is built on top of [/api/v1/status/tsdb](#tsdb-stats).
|
||||
|
||||
See [cardinality explorer playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/cardinality).
|
||||
See the example of using the cardinality explorer [here](https://victoriametrics.com/blog/cardinality-explorer/).
|
||||
|
||||
|
||||
## How to apply new config to VictoriaMetrics
|
||||
|
||||
@@ -370,7 +308,7 @@ VictoriaMetrics is configured via command-line flags, so it must be restarted wh
|
||||
* Wait until the process stops. This can take a few seconds.
|
||||
* Start VictoriaMetrics with the new command-line flags.
|
||||
|
||||
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. The same applies also to [vmagent](https://docs.victoriametrics.com/vmagent.html).
|
||||
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. The same applies alos to [vmagent](https://docs.victoriametrics.com/vmagent.html).
|
||||
|
||||
## How to scrape Prometheus exporters such as [node-exporter](https://github.com/prometheus/node_exporter)
|
||||
|
||||
@@ -386,90 +324,51 @@ See also [vmagent](https://docs.victoriametrics.com/vmagent.html), which can be
|
||||
|
||||
## How to send data from DataDog agent
|
||||
|
||||
VictoriaMetrics accepts data from [DataDog agent](https://docs.datadoghq.com/agent/)
|
||||
or [DogStatsD](https://docs.datadoghq.com/developers/dogstatsd/)
|
||||
via ["submit metrics" API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics)
|
||||
at `/datadog/api/v1/series` path.
|
||||
VictoriaMetrics accepts data from [DataDog agent](https://docs.datadoghq.com/agent/) or [DogStatsD](https://docs.datadoghq.com/developers/dogstatsd/) via ["submit metrics" API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics) at `/datadog/api/v1/series` path.
|
||||
|
||||
### Sending metrics to VictoriaMetrics
|
||||
Run DataDog agent with `DD_DD_URL=http://victoriametrics-host:8428/datadog` environment variable in order to write data to VictoriaMetrics at `victoriametrics-host` host. Another option is to set `dd_url` param at [DataDog agent configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files/) to `http://victoriametrics-host:8428/datadog`.
|
||||
|
||||
DataDog agent allows configuring destinations for metrics sending via ENV variable `DD_DD_URL`
|
||||
or via [configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files/) in section `dd_url`.
|
||||
VictoriaMetrics doesn't check `DD_API_KEY` param, so it can be set to arbitrary value.
|
||||
|
||||
<p align="center">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
|
||||
</p>
|
||||
Example on how to send data to VictoriaMetrics via DataDog "submit metrics" API from command line:
|
||||
|
||||
```console
|
||||
echo '
|
||||
{
|
||||
"series": [
|
||||
{
|
||||
"host": "test.example.com",
|
||||
"interval": 20,
|
||||
"metric": "system.load.1",
|
||||
"points": [[
|
||||
0,
|
||||
0.5
|
||||
]],
|
||||
"tags": [
|
||||
"environment:test"
|
||||
],
|
||||
"type": "rate"
|
||||
}
|
||||
]
|
||||
}
|
||||
' | curl -X POST --data-binary @- http://localhost:8428/datadog/api/v1/series
|
||||
```
|
||||
|
||||
The imported data can be read via [export API](https://docs.victoriametrics.com/#how-to-export-data-in-json-line-format):
|
||||
|
||||
To configure DataDog agent via ENV variable add the following prefix:
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```
|
||||
DD_DD_URL=http://victoriametrics:8428/datadog
|
||||
```console
|
||||
curl http://localhost:8428/api/v1/export -d 'match[]=system.load.1'
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
_Choose correct URL for VictoriaMetrics [here](https://docs.victoriametrics.com/url-examples.html#datadog)._
|
||||
|
||||
To configure DataDog agent via [configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files)
|
||||
add the following line:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
This command should return the following output if everything is OK:
|
||||
|
||||
```json
|
||||
{"metric":{"__name__":"system.load.1","environment":"test","host":"test.example.com"},"values":[0.5],"timestamps":[1632833641000]}
|
||||
```
|
||||
dd_url: http://victoriametrics:8428/datadog
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
vmagent also can accept Datadog metrics format. Depending on where vmagent will forward data,
|
||||
pick [single-node or cluster URL]((https://docs.victoriametrics.com/url-examples.html#datadog)) formats.
|
||||
|
||||
### Sending metrics to Datadog and VictoriaMetrics
|
||||
|
||||
DataDog allows configuring [Dual Shipping](https://docs.datadoghq.com/agent/guide/dual-shipping/) for metrics
|
||||
sending via ENV variable `DD_ADDITIONAL_ENDPOINTS` or via configuration file `additional_endpoints`.
|
||||
|
||||
<p align="center">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
|
||||
</p>
|
||||
|
||||
Run DataDog using the following ENV variable with VictoriaMetrics as additional metrics receiver:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```
|
||||
DD_ADDITIONAL_ENDPOINTS='{\"http://victoriametrics:8428/datadog\"}'
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
_Choose correct URL for VictoriaMetrics [here](https://docs.victoriametrics.com/url-examples.html#datadog)._
|
||||
|
||||
|
||||
To configure DataDog Dual Shipping via [configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files)
|
||||
add the following line:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```
|
||||
additional_endpoints: http://victoriametrics:8428/datadog
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
### Send via cURL
|
||||
|
||||
See how to send data to VictoriaMetrics via
|
||||
[DataDog "submit metrics"](https://docs.victoriametrics.com/url-examples.html#datadogapiv1series) from command line.
|
||||
|
||||
The imported data can be read via [export API](https://docs.victoriametrics.com/url-examples.html#apiv1export).
|
||||
|
||||
### Additional details
|
||||
|
||||
VictoriaMetrics automatically sanitizes metric names for the data ingested via DataDog protocol
|
||||
according to [DataDog metric naming recommendations](https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics).
|
||||
If you need accepting metric names as is without sanitizing, then pass `-datadog.sanitizeMetricName=false` command-line flag to VictoriaMetrics.
|
||||
|
||||
Extra labels may be added to all the written time series by passing `extra_label=name=value` query args.
|
||||
For example, `/datadog/api/v1/series?extra_label=foo=bar` would add `{foo="bar"}` label to all the ingested metrics.
|
||||
@@ -486,7 +385,7 @@ See [these docs](https://docs.victoriametrics.com/vmagent.html#adding-labels-to-
|
||||
|
||||
## How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)
|
||||
|
||||
Use `http://<victoriametrics-addr>:8428` url instead of InfluxDB url in agents' configs.
|
||||
Use `http://<victoriametric-addr>:8428` url instead of InfluxDB url in agents' configs.
|
||||
For instance, put the following lines into `Telegraf` config, so it sends data to VictoriaMetrics instead of InfluxDB:
|
||||
|
||||
```toml
|
||||
@@ -504,9 +403,6 @@ VictoriaMetrics performs the following transformations to the ingested InfluxDB
|
||||
* Field names are mapped to time series names prefixed with `{measurement}{separator}` value, where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag. See also `-influxSkipSingleField` command-line flag. If `{measurement}` is empty or if `-influxSkipMeasurement` command-line flag is set, then time series names correspond to field names.
|
||||
* Field values are mapped to time series values.
|
||||
* Tags are mapped to Prometheus labels as-is.
|
||||
* If `-usePromCompatibleNaming` command-line flag is set, then all the metric names and label names
|
||||
are normalized to [Prometheus-compatible naming](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels) by replacing unsupported chars with `_`.
|
||||
For example, `foo.bar-baz/1` metric name or label name is substituted with `foo_bar_baz_1`.
|
||||
|
||||
For example, the following InfluxDB line:
|
||||
|
||||
@@ -735,13 +631,14 @@ VictoriaMetrics accepts optional `extra_label=<label_name>=<label_value>` query
|
||||
VictoriaMetrics accepts optional `extra_filters[]=series_selector` query arg, which can be used for enforcing arbitrary label filters for queries. For example,
|
||||
`/api/v1/query_range?extra_filters[]={env=~"prod|staging",user="xyz"}&query=<query>` would automatically add `{env=~"prod|staging",user="xyz"}` label filters to the given `<query>`. This functionality can be used for limiting the scope of time series visible to the given tenant. It is expected that the `extra_filters[]` query args are automatically set by auth proxy sitting in front of VictoriaMetrics. See [vmauth](https://docs.victoriametrics.com/vmauth.html) and [vmgateway](https://docs.victoriametrics.com/vmgateway.html) as examples of such proxies.
|
||||
|
||||
VictoriaMetrics accepts multiple formats for `time`, `start` and `end` query args - see [these docs](#timestamp-formats).
|
||||
VictoriaMetrics accepts relative times in `time`, `start` and `end` query args additionally to unix timestamps and [RFC3339](https://www.ietf.org/rfc/rfc3339.txt).
|
||||
For example, the following query would return data for the last 30 minutes: `/api/v1/query_range?start=-30m&query=...`.
|
||||
|
||||
VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
|
||||
|
||||
VictoriaMetrics accepts `limit` query arg for `/api/v1/labels` and `/api/v1/label/<labelName>/values` handlers for limiting the number of returned entries. For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels. If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values, then limits specified in the command-line flags are used.
|
||||
|
||||
By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, `/api/v1/labels` and `/api/v1/label/<labelName>/values` while the Prometheus API defaults to all time. Explicitly set `start` and `end` to select the desired time range.
|
||||
By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range.
|
||||
VictoriaMetrics accepts `limit` query arg for `/api/v1/series` handlers for limiting the number of returned entries. For example, the query to `/api/v1/series?limit=5` returns a sample of up to 5 series, while ignoring the rest. If the provided `limit` value exceeds the corresponding `-search.maxSeries` command-line flag values, then limits specified in the command-line flags are used.
|
||||
|
||||
Additionally, VictoriaMetrics provides the following handlers:
|
||||
@@ -760,18 +657,6 @@ Additionally, VictoriaMetrics provides the following handlers:
|
||||
For example, request to `/api/v1/status/top_queries?topN=5&maxLifetime=30s` would return up to 5 queries per list, which were executed during the last 30 seconds.
|
||||
VictoriaMetrics tracks the last `-search.queryStats.lastQueriesCount` queries with durations at least `-search.queryStats.minQueryDuration`.
|
||||
|
||||
### Timestamp formats
|
||||
|
||||
VictoriaMetrics accepts the following formats for `time`, `start` and `end` query args
|
||||
in [query APIs](https://docs.victoriametrics.com/#prometheus-querying-api-usage) and
|
||||
in [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series).
|
||||
|
||||
- Unix timestamps in seconds with optional milliseconds after the point. For example, `1562529662.678`.
|
||||
- [RFC3339](https://www.ietf.org/rfc/rfc3339.txt). For example, '2022-03-29T01:02:03Z`.
|
||||
- Partial RFC3339. Examples: `2022`, `2022-03`, `2022-03-29`, `2022-03-29T01`, `2022-03-29T01:02`.
|
||||
- Relative duration comparing to the current time. For example, `1h5m` means `one hour and five minutes ago`.
|
||||
|
||||
|
||||
## Graphite API usage
|
||||
|
||||
VictoriaMetrics supports data ingestion in Graphite protocol - see [these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details.
|
||||
@@ -791,7 +676,7 @@ VictoriaMetrics supports `__graphite__` pseudo-label for filtering time series w
|
||||
|
||||
### Graphite Render API usage
|
||||
|
||||
[VictoriaMetrics Enterprise](https://docs.victoriametrics.com/enterprise.html) supports [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) subset
|
||||
[VictoriaMetrics Enterprise](https://victoriametrics.com/products/enterprise/) supports [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) subset
|
||||
at `/render` endpoint, which is used by [Graphite datasource in Grafana](https://grafana.com/docs/grafana/latest/datasources/graphite/).
|
||||
When configuring Graphite datasource in Grafana, the `Storage-Step` http request header must be set to a step between Graphite data points stored in VictoriaMetrics. For example, `Storage-Step: 10s` would mean 10 seconds distance between Graphite datapoints stored in VictoriaMetrics.
|
||||
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
|
||||
@@ -832,7 +717,7 @@ to your needs or when testing bugfixes.
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.18.
|
||||
2. Run `make victoria-metrics` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `victoria-metrics` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -848,7 +733,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
|
||||
|
||||
### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.18.
|
||||
2. Run `make victoria-metrics-linux-arm` or `make victoria-metrics-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `victoria-metrics-linux-arm` or `victoria-metrics-linux-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
@@ -862,7 +747,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
|
||||
|
||||
`Pure Go` mode builds only Go code without [cgo](https://golang.org/cmd/cgo/) dependencies.
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.18.
|
||||
2. Run `make victoria-metrics-pure` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `victoria-metrics-pure` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -924,9 +809,8 @@ Steps for restoring from a snapshot:
|
||||
|
||||
Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
|
||||
where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
|
||||
for metrics to delete. Delete API doesn't support the deletion of specific time ranges, the series can only be deleted completely.
|
||||
Storage space for the deleted time series isn't freed instantly - it is freed during subsequent
|
||||
[background merges of data files](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
|
||||
the deleted time series isn't freed instantly - it is freed during subsequent [background merges of data files](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
|
||||
Note that background merges may never occur for data from previous months, so storage space won't be freed for historical data.
|
||||
In this case [forced merge](#forced-merge) may help freeing up storage space.
|
||||
@@ -991,9 +875,8 @@ Each JSON line contains samples for a single time series. An example output:
|
||||
{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
|
||||
```
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
@@ -1040,9 +923,8 @@ where:
|
||||
* `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
|
||||
for metrics to export.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/csv -d 'format=<format>' -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
@@ -1068,9 +950,8 @@ wget -O- -q 'http://your_victoriametrics_instance:8428/api/v1/series/count' | jq
|
||||
# relaunch victoriametrics with search.maxExportSeries more than value from previous command
|
||||
```
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
|
||||
unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/api/v1/export/native -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
@@ -1098,8 +979,7 @@ Time series data can be imported into VictoriaMetrics via any supported data ing
|
||||
* `/api/v1/import/native` for importing data obtained from [/api/v1/export/native](#how-to-export-data-in-native-format).
|
||||
See [these docs](#how-to-import-data-in-native-format) for details.
|
||||
* `/api/v1/import/csv` for importing arbitrary CSV data. See [these docs](#how-to-import-csv-data) for details.
|
||||
* `/api/v1/import/prometheus` for importing data in Prometheus exposition format and in [Pushgateway format](https://github.com/prometheus/pushgateway#url).
|
||||
See [these docs](#how-to-import-data-in-prometheus-exposition-format) for details.
|
||||
* `/api/v1/import/prometheus` for importing data in Prometheus exposition format. See [these docs](#how-to-import-data-in-prometheus-exposition-format) for details.
|
||||
|
||||
### How to import data in JSON line format
|
||||
|
||||
@@ -1204,11 +1084,9 @@ Note that it could be required to flush response cache after importing historica
|
||||
|
||||
### How to import data in Prometheus exposition format
|
||||
|
||||
VictoriaMetrics accepts data in [Prometheus exposition format](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format),
|
||||
in [OpenMetrics format](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md)
|
||||
and in [Pushgateway format](https://github.com/prometheus/pushgateway#url) via `/api/v1/import/prometheus` path.
|
||||
|
||||
For example, the following command imports a single line in Prometheus exposition format into VictoriaMetrics:
|
||||
VictoriaMetrics accepts data in [Prometheus exposition format](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format)
|
||||
and in [OpenMetrics format](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md)
|
||||
via `/api/v1/import/prometheus` path. For example, the following line imports a single line in Prometheus exposition format into VictoriaMetrics:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
@@ -1234,16 +1112,6 @@ It should return something like the following:
|
||||
{"metric":{"__name__":"foo","bar":"baz"},"values":[123],"timestamps":[1594370496905]}
|
||||
```
|
||||
|
||||
The following command imports a single metric via [Pushgateway format](https://github.com/prometheus/pushgateway#url) with `{job="my_app",instance="host123"}` labels:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```console
|
||||
curl -d 'metric{label="abc"} 123' -X POST 'http://localhost:8428/api/v1/import/prometheus/metrics/job/my_app/instance/host123'
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
Pass `Content-Encoding: gzip` HTTP request header to `/api/v1/import/prometheus` for importing gzipped data:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
@@ -1255,8 +1123,8 @@ curl -X POST -H 'Content-Encoding: gzip' http://destination-victoriametrics:8428
|
||||
|
||||
</div>
|
||||
|
||||
Extra labels may be added to all the imported metrics either via [Pushgateway format](https://github.com/prometheus/pushgateway#url)
|
||||
or by passing `extra_label=name=value` query args. For example, `/api/v1/import/prometheus?extra_label=foo=bar` would add `{foo="bar"}` label to all the imported metrics.
|
||||
Extra labels may be added to all the imported metrics by passing `extra_label=name=value` query args.
|
||||
For example, `/api/v1/import/prometheus?extra_label=foo=bar` would add `{foo="bar"}` label to all the imported metrics.
|
||||
|
||||
If timestamp is missing in `<metric> <value> <timestamp>` Prometheus exposition format line, then the current timestamp is used during data ingestion.
|
||||
It can be overridden by passing unix timestamp in *milliseconds* via `timestamp` query arg. For example, `/api/v1/import/prometheus?timestamp=1594370496905`.
|
||||
@@ -1293,11 +1161,7 @@ Example contents for `-relabelConfig` file:
|
||||
regex: true
|
||||
```
|
||||
|
||||
VictoriaMetrics provides additional relabeling features such as Graphite-style relabeling.
|
||||
See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details.
|
||||
|
||||
The relabeling can be debugged at `http://victoriametrics:8428/metric-relabel-debug` page.
|
||||
See [these docs](https://docs.victoriametrics.com/vmagent.html#relabel-debug) for more details.
|
||||
VictoriaMetrics provides additional relabeling features such as Graphite-style relabeling. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details.
|
||||
|
||||
|
||||
## Federation
|
||||
@@ -1306,8 +1170,7 @@ VictoriaMetrics exports [Prometheus-compatible federation data](https://promethe
|
||||
at `http://<victoriametrics-addr>:8428/federate?match[]=<timeseries_selector_for_federation>`.
|
||||
|
||||
Optional `start` and `end` args may be added to the request in order to scrape the last point for each selected time series on the `[start ... end]` interval.
|
||||
See [allowed formats](#timestamp-formats) for these args.
|
||||
|
||||
`start` and `end` may contain either unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
|
||||
For example:
|
||||
```console
|
||||
curl http://<victoriametrics-addr>:8428/federate -d 'match[]=<timeseries_selector_for_export>' -d 'start=1654543486' -d 'end=1654543486'
|
||||
@@ -1345,11 +1208,10 @@ See also [resource usage limits docs](#resource-usage-limits).
|
||||
|
||||
By default VictoriaMetrics is tuned for an optimal resource usage under typical workloads. Some workloads may need fine-grained resource usage limits. In these cases the following command-line flags may be useful:
|
||||
|
||||
- `-memory.allowedPercent` and `-memory.allowedBytes` limit the amounts of memory, which may be used for various internal caches at VictoriaMetrics. Note that VictoriaMetrics may use more memory, since these flags don't limit additional memory, which may be needed on a per-query basis.
|
||||
- `-search.maxMemoryPerQuery` limits the amounts of memory, which can be used for processing a single query. Queries, which need more memory, are rejected. Heavy queries, which select big number of time series, may exceed the per-query memory limit by a small percent. The total memory limit for concurrently executed queries can be estimated as `-search.maxMemoryPerQuery` multiplied by `-search.maxConcurrentRequests`.
|
||||
- `-memory.allowedPercent` and `-search.allowedBytes` limit the amounts of memory, which may be used for various internal caches at VictoriaMetrics. Note that VictoriaMetrics may use more memory, since these flags don't limit additional memory, which may be needed on a per-query basis.
|
||||
- `-search.maxUniqueTimeseries` limits the number of unique time series a single query can find and process. VictoriaMetrics keeps in memory some metainformation about the time series located by each query and spends some CPU time for processing the found time series. This means that the maximum memory usage and CPU usage a single query can use is proportional to `-search.maxUniqueTimeseries`.
|
||||
- `-search.maxQueryDuration` limits the duration of a single query. If the query takes longer than the given duration, then it is canceled. This allows saving CPU and RAM when executing unexpected heavy queries.
|
||||
- `-search.maxConcurrentRequests` limits the number of concurrent requests VictoriaMetrics can process. Bigger number of concurrent requests usually means bigger memory usage. For example, if a single query needs 100 MiB of additional memory during its execution, then 100 concurrent queries may need `100 * 100 MiB = 10 GiB` of additional memory. So it is better to limit the number of concurrent queries, while suspending additional incoming queries if the concurrency limit is reached. VictoriaMetrics provides `-search.maxQueueDuration` command-line flag for limiting the max wait time for suspended queries. See also `-search.maxMemoryPerQuery` command-line flag.
|
||||
- `-search.maxConcurrentRequests` limits the number of concurrent requests VictoriaMetrics can process. Bigger number of concurrent requests usually means bigger memory usage. For example, if a single query needs 100 MiB of additional memory during its execution, then 100 concurrent queries may need `100 * 100 MiB = 10 GiB` of additional memory. So it is better to limit the number of concurrent queries, while suspending additional incoming queries if the concurrency limit is reached. VictoriaMetrics provides `-search.maxQueueDuration` command-line flag for limiting the max wait time for suspended queries.
|
||||
- `-search.maxSamplesPerSeries` limits the number of raw samples the query can process per each time series. VictoriaMetrics sequentially processes raw samples per each found time series during the query. It unpacks raw samples on the selected time range per each time series into memory and then applies the given [rollup function](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions). The `-search.maxSamplesPerSeries` command-line flag allows limiting memory usage in the case when the query is executed on a time range, which contains hundreds of millions of raw samples per each located time series.
|
||||
- `-search.maxSamplesPerQuery` limits the number of raw samples a single query can process. This allows limiting CPU usage for heavy queries.
|
||||
- `-search.maxPointsPerTimeseries` limits the number of calculated points, which can be returned per each matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
|
||||
@@ -1404,12 +1266,7 @@ with the enabled de-duplication. See [this section](#deduplication) for details.
|
||||
|
||||
## Deduplication
|
||||
|
||||
VictoriaMetrics leaves a single raw sample with the biggest timestamp per each `-dedup.minScrapeInterval` discrete interval
|
||||
if `-dedup.minScrapeInterval` is set to positive duration. For example, `-dedup.minScrapeInterval=60s` would leave a single
|
||||
raw sample with the biggest timestamp per each discrete 60s interval.
|
||||
This aligns with the [staleness rules in Prometheus](https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness).
|
||||
|
||||
If multiple raw samples have the same biggest timestamp on the given `-dedup.minScrapeInterval` discrete interval, then the sample with the biggest value is left.
|
||||
VictoriaMetrics leaves a single raw sample with the biggest timestamp per each `-dedup.minScrapeInterval` discrete interval if `-dedup.minScrapeInterval` is set to positive duration. For example, `-dedup.minScrapeInterval=60s` would leave a single raw sample with the biggest timestamp per each discrete 60s interval. If multiple raw samples have the same biggest timestamp on the given `-dedup.minScrapeInterval` discrete interval, then an arbitrary sample out of these samples is left. This aligns with the [staleness rules in Prometheus](https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness).
|
||||
|
||||
The `-dedup.minScrapeInterval=D` is equivalent to `-downsampling.period=0s:D` if [downsampling](#downsampling) is enabled. So it is safe to use deduplication and downsampling simultaneously.
|
||||
|
||||
@@ -1423,50 +1280,18 @@ It is recommended passing different `-promscrape.cluster.name` values to HA pair
|
||||
|
||||
## Storage
|
||||
|
||||
VictoriaMetrics buffers the ingested data in memory for up to a second. Then the buffered data is written to in-memory `parts`,
|
||||
which can be searched during queries. The in-memory `parts` are periodically persisted to disk, so they could survive unclean shutdown
|
||||
such as out of memory crash, hardware power loss or `SIGKILL` signal. The interval for flushing the in-memory data to disk
|
||||
can be configured with the `-inmemoryDataFlushInterval` command-line flag (note that too short flush interval may significantly increase disk IO).
|
||||
VictoriaMetrics stores time series data in [MergeTree](https://en.wikipedia.org/wiki/Log-structured_merge-tree)-like
|
||||
data structures. On insert, VictoriaMetrics accumulates up to 1s of data and dumps it on disk to
|
||||
`<-storageDataPath>/data/small/YYYY_MM/` subdirectory forming a `part` with the following
|
||||
name pattern: `rowsCount_blocksCount_minTimestamp_maxTimestamp`. Each part consists of two "columns":
|
||||
values and timestamps. These are sorted and compressed raw time series values. Additionally, part contains
|
||||
index files for searching for specific series in the values and timestamps files.
|
||||
|
||||
In-memory parts are persisted to disk into `part` directories under the `<-storageDataPath>/data/small/YYYY_MM/` folder,
|
||||
where `YYYY_MM` is the month partition for the stored data. For example, `2022_11` is the partition for `parts`
|
||||
with [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) from `November 2022`.
|
||||
|
||||
The `part` directory has the following name pattern: `rowsCount_blocksCount_minTimestamp_maxTimestamp`, where:
|
||||
|
||||
- `rowsCount` - the number of [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) stored in the part
|
||||
- `blocksCount` - the number of blocks stored in the part (see details about blocks below)
|
||||
- `minTimestamp` and `maxTimestamp` - minimum and maximum timestamps across raw samples stored in the part
|
||||
|
||||
Each `part` consists of `blocks` sorted by internal time series id (aka `TSID`).
|
||||
Each `block` contains up to 8K [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples),
|
||||
which belong to a single [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series).
|
||||
Raw samples in each block are sorted by `timestamp`. Blocks for the same time series are sorted
|
||||
by the `timestamp` of the first sample. Timestamps and values for all the blocks
|
||||
are stored in [compressed form](https://faun.pub/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
|
||||
in separate files under `part` directory - `timestamps.bin` and `values.bin`.
|
||||
|
||||
The `part` directory also contains `index.bin` and `metaindex.bin` files - these files contain index
|
||||
for fast block lookups, which belong to the given `TSID` and cover the given time range.
|
||||
|
||||
`Parts` are periodically merged into bigger parts in background. The background merge provides the following benefits:
|
||||
|
||||
* keeping the number of data files under control, so they don't exceed limits on open files
|
||||
* improved data compression, since bigger parts are usually compressed better than smaller parts
|
||||
* improved query speed, since queries over smaller number of parts are executed faster
|
||||
* various background maintenance tasks such as [de-duplication](#deduplication), [downsampling](#downsampling)
|
||||
and [freeing up disk space for the deleted time series](#how-to-delete-time-series) are performed during the merge
|
||||
|
||||
Newly added `parts` either successfully appear in the storage or fail to appear.
|
||||
The newly added `parts` are being created in a temporary directory under `<-storageDataPath>/data/{small,big}/YYYY_MM/tmp` folder.
|
||||
When the newly added `part` is fully written and [fsynced](https://man7.org/linux/man-pages/man2/fsync.2.html)
|
||||
to a temporary directory, then it is atomically moved to the storage directory.
|
||||
Thanks to this alogrithm, storage never contains partially created parts, even if hardware power off
|
||||
occurrs in the middle of writing the `part` to disk - such incompletely written `parts`
|
||||
are automatically deleted on the next VictoriaMetrics start.
|
||||
|
||||
The same applies to merge process — `parts` are either fully merged into a new `part` or fail to merge,
|
||||
leaving the source `parts` untouched.
|
||||
`Parts` are periodically merged into the bigger parts. The resulting `part` is constructed
|
||||
under `<-storageDataPath>/data/{small,big}/YYYY_MM/tmp` subdirectory.
|
||||
When the resulting `part` is complete, it is atomically moved from the `tmp`
|
||||
to its own subdirectory, while the source parts are atomically removed. The end result is that the source
|
||||
parts are substituted by a single resulting bigger `part` in the `<-storageDataPath>/data/{small,big}/YYYY_MM/` directory.
|
||||
|
||||
VictoriaMetrics doesn't merge parts if their summary size exceeds free disk space.
|
||||
This prevents from potential out of disk space errors during merge.
|
||||
@@ -1475,10 +1300,24 @@ This increases overhead during data querying, since VictoriaMetrics needs to rea
|
||||
bigger number of parts per each request. That's why it is recommended to have at least 20%
|
||||
of free disk space under directory pointed by `-storageDataPath` command-line flag.
|
||||
|
||||
Information about merging process is available in [the dashboard for single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [the dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/).
|
||||
Information about merging process is available in [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176) Grafana dashboards.
|
||||
See more details in [monitoring docs](#monitoring).
|
||||
|
||||
The `merge` process improves compression rate and keeps number of `parts` on disk relatively low.
|
||||
Benefits of doing the merge process are the following:
|
||||
|
||||
* it improves query performance, since lower number of `parts` are inspected with each query
|
||||
* it reduces the number of data files, since each `part` contains fixed number of files
|
||||
* various background maintenance tasks such as [de-duplication](#deduplication), [downsampling](#downsampling)
|
||||
and [freeing up disk space for the deleted time series](#how-to-delete-time-series) are performed during the merge.
|
||||
|
||||
Newly added `parts` either appear in the storage or fail to appear.
|
||||
Storage never contains partially created parts. The same applies to merge process — `parts` are either fully
|
||||
merged into a new `part` or fail to merge. MergeTree doesn't contain partially merged `parts`.
|
||||
`Part` contents in MergeTree never change. Parts are immutable. They may be only deleted after the merge
|
||||
to a bigger `part` or when the `part` contents goes outside the configured `-retentionPeriod`.
|
||||
|
||||
See [this article](https://valyala.medium.com/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) for more details.
|
||||
|
||||
See also [how to work with snapshots](#how-to-work-with-snapshots).
|
||||
@@ -1506,11 +1345,7 @@ VictoriaMetrics does not support indefinite retention, but you can specify an ar
|
||||
|
||||
## Multiple retentions
|
||||
|
||||
Distinct retentions for distinct time series can be configured via [retention filters](#retention-filters)
|
||||
in [VictoriaMetrics enterprise](https://docs.victoriametrics.com/enterprise.html).
|
||||
|
||||
Community version of VictoriaMetrics supports only a single retention, which can be configured via [-retentionPeriod](#retention) command-line flag.
|
||||
If you need multiple retentions in community version of VictoriaMetrics, then you may start multiple VictoriaMetrics instances with distinct values for the following flags:
|
||||
A single instance of VictoriaMetrics supports only a single retention, which can be configured via `-retentionPeriod` command-line flag. If you need multiple retentions, then you may start multiple VictoriaMetrics instances with distinct values for the following flags:
|
||||
|
||||
* `-retentionPeriod`
|
||||
* `-storageDataPath`, so the data for each retention period is saved in a separate directory
|
||||
@@ -1518,44 +1353,12 @@ If you need multiple retentions in community version of VictoriaMetrics, then yo
|
||||
|
||||
Then set up [vmauth](https://docs.victoriametrics.com/vmauth.html) in front of VictoriaMetrics instances,
|
||||
so it could route requests from particular user to VictoriaMetrics with the desired retention.
|
||||
|
||||
Similar scheme can be applied for multiple tenants in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
The same scheme could be implemented for multiple tenants in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
See [these docs](https://docs.victoriametrics.com/guides/guide-vmcluster-multiple-retention-setup.html) for multi-retention setup details.
|
||||
|
||||
## Retention filters
|
||||
|
||||
[Enterprise version of VictoriaMetrics](https://docs.victoriametrics.com/enterprise.html) supports e.g. `retention filters`,
|
||||
which allow configuring multiple retentions for distinct sets of time series matching the configured [series filters](https://docs.victoriametrics.com/keyConcepts.html#filtering)
|
||||
via `-retentionFilter` command-line flag. This flag accepts `filter:duration` options, where `filter` must be
|
||||
a valid [series filter](https://docs.victoriametrics.com/keyConcepts.html#filtering), while the `duration`
|
||||
must contain valid [retention](#retention) for time series matching the given `filter`. If series doesn't match
|
||||
any configured `-retentionFilter`, then the retention configured via [-retentionPeriod](#retention) command-line flag is applied to it.
|
||||
If series matches multiple configured retention filters, then the smallest retention is applied.
|
||||
|
||||
For example, the following config sets 3 days retention for time series with `team="juniors"` label,
|
||||
30 days retention for time series with `env="dev"` or `env="staging"` label and 1 year retention for the remaining time series:
|
||||
|
||||
```
|
||||
-retentionFilter='{team="juniors"}:3d' -retentionFilter='{env=~"dev|staging"}:30d' -retentionPeriod=1y
|
||||
```
|
||||
|
||||
Important notes:
|
||||
|
||||
- The data outside of the configured retention isn't deleted instantly - it is deleted eventually during [background merges](https://docs.victoriametrics.com/#storage).
|
||||
- The `-retentionFilter` doesn't remove old data from `indexdb` (aka inverted index) until the configured [-retentionPeriod](#retention).
|
||||
So the `indexdb` size can grow big under [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate)
|
||||
even for small retentions configured via `-retentionFilter`.
|
||||
|
||||
It is safe updating `-retentionFilter` during VictoriaMetrics restarts - the updated retention filters are applied eventually
|
||||
to historical data.
|
||||
|
||||
See [how to configure multiple retentions in VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#retention-filters).
|
||||
|
||||
Retention filters can be evaluated for free by downloading and using enterprise binaries from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
|
||||
|
||||
## Downsampling
|
||||
|
||||
[VictoriaMetrics Enterprise](https://docs.victoriametrics.com/enterprise.html) supports multi-level downsampling with `-downsampling.period` command-line flag. For example:
|
||||
[VictoriaMetrics Enterprise](https://victoriametrics.com/products/enterprise/) supports multi-level downsampling with `-downsampling.period` command-line flag. For example:
|
||||
|
||||
* `-downsampling.period=30d:5m` instructs VictoriaMetrics to [deduplicate](#deduplication) samples older than 30 days with 5 minutes interval.
|
||||
|
||||
@@ -1614,8 +1417,6 @@ VictoriaMetrics provides the following security-related command-line flags:
|
||||
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
|
||||
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
|
||||
|
||||
VictoriaMetrics has achieved security certifications for Database Software Development and Software-Based Monitoring Services. We apply strict security measures in everything we do. See our [Security page](https://victoriametrics.com/security/) for more details.
|
||||
|
||||
## Tuning
|
||||
|
||||
* No need in tuning for VictoriaMetrics - it uses reasonable defaults for command-line flags,
|
||||
@@ -1643,8 +1444,8 @@ Alternatively, single-node VictoriaMetrics can self-scrape the metrics when `-se
|
||||
set to duration greater than 0. For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page
|
||||
with 10 seconds interval.
|
||||
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [clustered](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/) VictoriaMetrics.
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/dashboards/10229)
|
||||
and [clustered](https://grafana.com/grafana/dashboards/11176) VictoriaMetrics.
|
||||
See an [alternative dashboard for clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11831)
|
||||
created by community.
|
||||
|
||||
@@ -1657,8 +1458,7 @@ VictoriaMetrics exposes currently running queries and their execution times at `
|
||||
|
||||
VictoriaMetrics exposes queries, which take the most time to execute, at `/api/v1/status/top_queries` page.
|
||||
|
||||
See also [VictoriaMetrics Monitoring](https://victoriametrics.com/blog/victoriametrics-monitoring/)
|
||||
and [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html).
|
||||
See also [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html).
|
||||
|
||||
## TSDB stats
|
||||
|
||||
@@ -1735,9 +1535,7 @@ All the durations and timestamps in traces are in milliseconds.
|
||||
|
||||
Query tracing is allowed by default. It can be denied by passing `-denyQueryTracing` command-line flag to VictoriaMetrics.
|
||||
|
||||
[VMUI](#vmui) provides an UI:
|
||||
- for query tracing - just click `Trace query` checkbox and re-run the query in order to investigate its' trace.
|
||||
- for exploring custom trace - go to the tab `Trace analyzer` and upload or paste JSON with trace information.
|
||||
[VMUI](#vmui) provides an UI for query tracing - just click `Trace query` checkbox and re-run the query in order to investigate its' trace.
|
||||
|
||||
|
||||
## Cardinality limiter
|
||||
@@ -1775,8 +1573,7 @@ The exceeded limits can be [monitored](#monitoring) with the following metrics:
|
||||
|
||||
These limits are approximate, so VictoriaMetrics can underflow/overflow the limit by a small percentage (usually less than 1%).
|
||||
|
||||
See also more advanced [cardinality limiter in vmagent](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter)
|
||||
and [cardinality explorer docs](#cardinality-explorer).
|
||||
See also more advanced [cardinality limiter in vmagent](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter).
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
@@ -1801,11 +1598,10 @@ and [cardinality explorer docs](#cardinality-explorer).
|
||||
|
||||
* VictoriaMetrics buffers incoming data in memory for up to a few seconds before flushing it to persistent storage.
|
||||
This may lead to the following "issues":
|
||||
* Data becomes available for querying in a few seconds after inserting. It is possible to flush in-memory buffers to searchable parts
|
||||
* Data becomes available for querying in a few seconds after inserting. It is possible to flush in-memory buffers to persistent storage
|
||||
by requesting `/internal/force_flush` http handler. This handler is mostly needed for testing and debugging purposes.
|
||||
* The last few seconds of inserted data may be lost on unclean shutdown (i.e. OOM, `kill -9` or hardware reset).
|
||||
The `-inmemoryDataFlushInterval` command-line flag allows controlling the frequency of in-memory data flush to persistent storage.
|
||||
See [storage docs](#storage) and [this article](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704) for more details.
|
||||
See [this article for technical details](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
|
||||
|
||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||
then it is likely you have too many [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series) for the current amount of RAM.
|
||||
@@ -1893,8 +1689,8 @@ The following metrics for each type of cache are exported at [`/metrics` page](#
|
||||
* `vm_cache_misses_total` - the number of cache misses
|
||||
* `vm_cache_entries` - the number of entries in the cache
|
||||
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176)
|
||||
contain `Caches` section with cache metrics visualized. The panels show the current
|
||||
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
|
||||
then cache efficiency is already very high and does not need any tuning.
|
||||
@@ -2127,17 +1923,13 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
```
|
||||
-bigMergeConcurrency int
|
||||
The maximum number of CPU cores to use for big merges. Default value is used if set to 0
|
||||
-cacheExpireDuration duration
|
||||
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
|
||||
-configAuthKey string
|
||||
Authorization key for accessing /config page. It must be passed via authKey query arg
|
||||
-csvTrimTimestamp duration
|
||||
Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
-dedup.minScrapeInterval duration
|
||||
Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling
|
||||
-deleteAuthKey string
|
||||
@@ -2147,7 +1939,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-denyQueryTracing
|
||||
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
|
||||
-downsampling.period array
|
||||
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. See https://docs.victoriametrics.com/#downsampling for details
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-dryRun
|
||||
Whether to check only -promscrape.config and then exit. Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag
|
||||
@@ -2158,7 +1950,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf
|
||||
-finalMergeDelay duration
|
||||
The delay before starting final merge for per-month partition after no new data is ingested into it. Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. Zero value disables final merge
|
||||
-flagsAuthKey string
|
||||
@@ -2193,13 +1985,13 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
TCP address to listen for http connections (default ":8428")
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 104857600)
|
||||
-influx.databaseNames array
|
||||
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-influx.maxLineSize size
|
||||
The maximum size in bytes for a single InfluxDB line during parsing
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 262144)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 262144)
|
||||
-influxDBLabel string
|
||||
Default label for the DB name sent over '?db={db_name}' query parameter (default "db")
|
||||
-influxListenAddr string
|
||||
@@ -2212,8 +2004,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Uses '{measurement}' instead of '{measurement}{separator}{field_name}' for metic name if InfluxDB line contains only a single field
|
||||
-influxTrimTimestamp duration
|
||||
Trim timestamps for InfluxDB line protocol data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-inmemoryDataFlushInterval duration
|
||||
The interval for guaranteed saving of in-memory data to disk. The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). Smaller intervals increase disk IO load. Minimum supported value is 1s (default 5s)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration for waiting in the queue for insert requests due to -maxConcurrentInserts (default 1m0s)
|
||||
-logNewSeries
|
||||
@@ -2224,8 +2014,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -2235,17 +2023,17 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
The maximum number of concurrent inserts. Default value should work for most cases, since it minimizes the overhead for concurrent inserts. This option is tigthly coupled with -insert.maxQueueDuration (default 16)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
-maxLabelValueLen int
|
||||
The maximum length of label values in the accepted time series. Longer label values are truncated. In this case the vm_too_long_label_values_total metric at /metrics page is incremented (default 16384)
|
||||
-maxLabelsPerTimeseries int
|
||||
The maximum number of labels accepted per time series. Superfluous labels are dropped. In this case the vm_metrics_with_dropped_labels_total metric at /metrics page is incremented (default 30)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
@@ -2258,15 +2046,13 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Trim timestamps for OpenTSDB 'telnet put' data to this duration. Minimum practical duration is 1s. Higher duration (i.e. 1m) may be used for reducing disk space usage for timestamp data (default 1s)
|
||||
-opentsdbhttp.maxInsertRequestSize size
|
||||
The maximum size of OpenTSDB HTTP put request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 33554432)
|
||||
-opentsdbhttpTrimTimestamp duration
|
||||
Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-precisionBits int
|
||||
The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss (default 64)
|
||||
-prevCacheRemovalPercent float
|
||||
Items in the previous caches are removed when the percent of requests it serves becomes lower than this value. Higher values reduce memory usage at the cost of higher CPU usage. See also -cacheExpireDuration (default 0.1)
|
||||
-promscrape.azureSDCheckInterval duration
|
||||
Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#azure_sd_configs for details (default 1m0s)
|
||||
-promscrape.cluster.memberNum string
|
||||
@@ -2325,17 +2111,13 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The maximum number of droppedTargets to show at /api/v1/targets page. Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. Note that the increased number of tracked dropped targets may result in increased memory usage (default 1000)
|
||||
-promscrape.maxResponseHeadersSize size
|
||||
The maximum size of http response headers from Prometheus scrape targets
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 4096)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 4096)
|
||||
-promscrape.maxScrapeSize size
|
||||
The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16777216)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216)
|
||||
-promscrape.minResponseSizeForStreamParse size
|
||||
The minimum target response size for automatic switching to stream parsing mode, which can reduce memory usage. See https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 1000000)
|
||||
-promscrape.nomad.waitTime duration
|
||||
Wait time used by Nomad service discovery. Default value is used if not set
|
||||
-promscrape.nomadSDCheckInterval duration
|
||||
Interval for checking for changes in Nomad. This works only if nomad_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#nomad_sd_configs for details (default 30s)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 1000000)
|
||||
-promscrape.noStaleMarkers
|
||||
Whether to disable sending Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. This option also disables populating the scrape_series_added metric. See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series
|
||||
-promscrape.openstackSDCheckInterval duration
|
||||
@@ -2351,7 +2133,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-promscrape.suppressScrapeErrorsDelay duration
|
||||
The delay for suppressing repeated scrape errors logging per each scrape targets. This may be used for reducing the number of log lines related to scrape errors. See also -promscrape.suppressScrapeErrors
|
||||
-promscrape.yandexcloudSDCheckInterval duration
|
||||
Interval for checking for changes in Yandex Cloud API. This works only if yandexcloud_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#yandexcloud_sd_configs for details (default 30s)
|
||||
Interval for checking for changes in Yandex Cloud API. This works only if yandexcloud_sd_configs is configured in '-promscrape.config' file. (default 30s)
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -2362,11 +2144,10 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-relabelConfig string
|
||||
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. The path can point either to local file or to http url. See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal
|
||||
-retentionFilter array
|
||||
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-relabelDebug
|
||||
Whether to log metrics before and after relabeling with -relabelConfig. If the -relabelDebug is enabled, then the metrics aren't sent to storage. This is useful for debugging the relabeling configs
|
||||
-retentionPeriod value
|
||||
Data with timestamps outside the retentionPeriod is automatically deleted. See also -retentionFilter
|
||||
Data with timestamps outside the retentionPeriod is automatically deleted
|
||||
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
|
||||
-retentionTimezoneOffset duration
|
||||
The offset for performing indexdb rotation. If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)
|
||||
@@ -2377,15 +2158,15 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-search.disableCache
|
||||
Whether to disable response caching. This may be useful during data backfilling
|
||||
-search.graphiteMaxPointsPerSeries int
|
||||
The maximum number of points per series Graphite render API can return. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default 1000000)
|
||||
The maximum number of points per series Graphite render API can return (default 1000000)
|
||||
-search.graphiteStorageStep duration
|
||||
The interval between datapoints stored in the database. It is used at Graphite Render API handler for normalizing the interval between datapoints in case it isn't normalized. It can be overridden by sending 'storage_step' query arg to /render API or by sending the desired interval via 'Storage-Step' http header during querying /render API. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default 10s)
|
||||
The interval between datapoints stored in the database. It is used at Graphite Render API handler for normalizing the interval between datapoints in case it isn't normalized. It can be overridden by sending 'storage_step' query arg to /render API or by sending the desired interval via 'Storage-Step' http header during querying /render API (default 10s)
|
||||
-search.latencyOffset duration
|
||||
The time when data points become visible in query results after the collection. It can be overridden on per-query basis via latency_offset arg. Too small value can result in incomplete last points for query results (default 30s)
|
||||
The time when data points become visible in query results after the collection. Too small value can result in incomplete last points for query results (default 30s)
|
||||
-search.logSlowQueryDuration duration
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging (default 5s)
|
||||
-search.maxConcurrentRequests int
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration and -search.maxMemoryPerQuery (default 8)
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores. See also -search.maxQueueDuration (default 8)
|
||||
-search.maxExportDuration duration
|
||||
The maximum duration for /api/v1/export call (default 720h0m0s)
|
||||
-search.maxExportSeries int
|
||||
@@ -2393,12 +2174,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
-search.maxFederateSeries int
|
||||
The maximum number of time series, which can be returned from /federate. This option allows limiting memory usage (default 1000000)
|
||||
-search.maxGraphiteSeries int
|
||||
The maximum number of time series, which can be scanned during queries to Graphite Render API. See https://docs.victoriametrics.com/#graphite-render-api-usage . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default 300000)
|
||||
The maximum number of time series, which can be scanned during queries to Graphite Render API. See https://docs.victoriametrics.com/#graphite-render-api-usage (default 300000)
|
||||
-search.maxLookback duration
|
||||
Synonym to -search.lookback-delta from Prometheus. The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. See also '-search.maxStalenessInterval' flag, which has the same meaining due to historical reasons
|
||||
-search.maxMemoryPerQuery size
|
||||
The maximum amounts of memory a single query may consume. Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.maxPointsPerTimeseries int
|
||||
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph (default 30000)
|
||||
-search.maxPointsSubqueryPerTimeseries int
|
||||
@@ -2407,7 +2185,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
The maximum duration for query execution (default 30s)
|
||||
-search.maxQueryLen size
|
||||
The maximum search query length in bytes
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16384)
|
||||
-search.maxQueueDuration duration
|
||||
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
|
||||
-search.maxSamplesPerQuery int
|
||||
@@ -2463,29 +2241,25 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-storage.cacheSizeIndexDBDataBlocks size
|
||||
Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.cacheSizeIndexDBIndexBlocks size
|
||||
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.cacheSizeIndexDBTagFilters size
|
||||
Overrides max size for indexdb/tagFiltersToMetricIDs cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Overrides max size for indexdb/tagFilters cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.cacheSizeStorageTSID size
|
||||
Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.maxDailySeries int
|
||||
The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/#cardinality-limiter . See also -storage.maxHourlySeries
|
||||
-storage.maxHourlySeries int
|
||||
The maximum number of unique series can be added to the storage during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/#cardinality-limiter . See also -storage.maxDailySeries
|
||||
-storage.minFreeDiskSpaceBytes size
|
||||
The minimum free disk space at -storageDataPath after which the storage stops accepting new data
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 10000000)
|
||||
-storageDataPath string
|
||||
Path to storage data (default "victoria-metrics-data")
|
||||
-streamAggr.config string
|
||||
Optional path to file with stream aggregation config. See https://docs.victoriametrics.com/stream-aggregation.html .See also -remoteWrite.streamAggr.keepInput
|
||||
-streamAggr.keepInput
|
||||
Whether to keep input samples after the aggregation with -streamAggr.config .By default the input is dropped after the aggregation, so only the aggregate data is stored. See https://docs.victoriametrics.com/stream-aggregation.html
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
@@ -2495,14 +2269,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-usePromCompatibleNaming
|
||||
Whether to replace characters unsupported by Prometheus with underscores in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
-vmalert.proxyURL string
|
||||
Optional URL for proxying requests to vmalert. For example, if -vmalert.proxyURL=http://vmalert:8880 , then alerting API requests such as /api/v1/rules from Grafana will be proxied to http://vmalert:8880/api/v1/rules
|
||||
-vmui.customDashboardsPath string
|
||||
Optional path to vmui dashboards. See https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmui/packages/vmui/public/dashboards
|
||||
```
|
||||
|
||||
14
SECURITY.md
14
SECURITY.md
@@ -1,14 +0,0 @@
|
||||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
| Version | Supported |
|
||||
|---------|--------------------|
|
||||
| 1.81.x | :white_check_mark: |
|
||||
| 1.80.x | :x: |
|
||||
| 1.79.x | :white_check_mark: |
|
||||
| < 1.78 | :x: |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
Please report any security issues to security@victoriametrics.com
|
||||
@@ -29,10 +29,6 @@ var (
|
||||
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only -promscrape.config and then exit. "+
|
||||
"Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag")
|
||||
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
|
||||
"The saved data survives unclean shutdown such as OOM crash, hardware reset, SIGKILL, etc. "+
|
||||
"Bigger intervals may help increasing lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
|
||||
"Smaller intervals increase disk IO load. Minimum supported value is 1s")
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -58,7 +54,6 @@ func main() {
|
||||
logger.Infof("starting VictoriaMetrics at %q...", *httpListenAddr)
|
||||
startTime := time.Now()
|
||||
storage.SetDedupInterval(*minScrapeInterval)
|
||||
storage.SetDataFlushInterval(*inmemoryDataFlushInterval)
|
||||
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
|
||||
vmselect.Init()
|
||||
vminsert.Init()
|
||||
@@ -101,8 +96,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
{"vmui", "Web UI"},
|
||||
{"targets", "status for discovered active targets"},
|
||||
{"service-discovery", "labels before and after relabeling for discovered targets"},
|
||||
{"metric-relabel-debug", "debug metric relabeling"},
|
||||
{"expand-with-exprs", "WITH expressions' tutorial"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"metrics", "available service metrics"},
|
||||
|
||||
@@ -138,7 +138,7 @@ func setUp() {
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
resp.Body.Close()
|
||||
return resp.StatusCode == 200
|
||||
}
|
||||
if err := waitFor(testStorageInitTimeout, readyStorageCheckFunc); err != nil {
|
||||
@@ -261,7 +261,7 @@ func testRead(t *testing.T) {
|
||||
for _, q := range test.Query {
|
||||
q = testutil.PopulateTimeTplString(q, insertionTime)
|
||||
if test.Issue != "" {
|
||||
test.Issue = "\nRegression in " + test.Issue
|
||||
test.Issue = "Regression in " + test.Issue
|
||||
}
|
||||
switch true {
|
||||
case strings.HasPrefix(q, "/api/v1/export"):
|
||||
@@ -284,7 +284,7 @@ func testRead(t *testing.T) {
|
||||
queryResult := Query{}
|
||||
httpReadStruct(t, testReadHTTPPath, q, &queryResult)
|
||||
if err := checkQueryResult(queryResult, test.ResultQuery); err != nil {
|
||||
t.Fatalf("Query. %s fails with error: %s.%s", q, err, test.Issue)
|
||||
t.Fatalf("Query. %s fails with error %s.%s", q, err, test.Issue)
|
||||
}
|
||||
default:
|
||||
t.Fatalf("unsupported read query %s", q)
|
||||
@@ -337,9 +337,7 @@ func tcpWrite(t *testing.T, address string, data string) {
|
||||
s := newSuite(t)
|
||||
conn, err := net.Dial("tcp", address)
|
||||
s.noError(err)
|
||||
defer func() {
|
||||
_ = conn.Close()
|
||||
}()
|
||||
defer conn.Close()
|
||||
n, err := conn.Write([]byte(data))
|
||||
s.noError(err)
|
||||
s.equalInt(n, len(data))
|
||||
@@ -350,9 +348,7 @@ func httpReadMetrics(t *testing.T, address, query string) []Metric {
|
||||
s := newSuite(t)
|
||||
resp, err := http.Get(address + query)
|
||||
s.noError(err)
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
defer resp.Body.Close()
|
||||
s.equalInt(resp.StatusCode, 200)
|
||||
var rows []Metric
|
||||
for dec := json.NewDecoder(resp.Body); dec.More(); {
|
||||
@@ -367,9 +363,7 @@ func httpReadStruct(t *testing.T, address, query string, dst interface{}) {
|
||||
s := newSuite(t)
|
||||
resp, err := http.Get(address + query)
|
||||
s.noError(err)
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
defer resp.Body.Close()
|
||||
s.equalInt(resp.StatusCode, 200)
|
||||
s.noError(json.NewDecoder(resp.Body).Decode(dst))
|
||||
}
|
||||
|
||||
@@ -85,9 +85,7 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
mr.Timestamp = currentTimestamp
|
||||
mr.Value = r.Value
|
||||
}
|
||||
if err := vmstorage.AddRows(mrs); err != nil {
|
||||
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
||||
}
|
||||
vmstorage.AddRows(mrs)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ func TestPopulateTimeTplString(t *testing.T) {
|
||||
}
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
result := PopulateTimeTplString(s, now.UTC())
|
||||
result := PopulateTimeTplString(s, now)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result; got %q; want %q", result, resultExpected)
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"forms_daily_count;item=x 2 {TIME_S-2m}",
|
||||
"forms_daily_count;item=y 3 {TIME_S-1m}",
|
||||
"forms_daily_count;item=y 4 {TIME_S-2m}"],
|
||||
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}&latency_offset=1ms"],
|
||||
"query": ["/api/v1/query?query=min%20by%20(item)%20(min_over_time(forms_daily_count[10m:1m]))&time={TIME_S-1m}"],
|
||||
"result_query": {
|
||||
"status":"success",
|
||||
"data":{"resultType":"vector","result":[{"metric":{"item":"x"},"value":["{TIME_S-1m}","2"]},{"metric":{"item":"y"},"value":["{TIME_S-1m}","4"]}]}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -10,6 +10,7 @@ import (
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -25,8 +26,10 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -27,9 +28,11 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
ce := req.Header.Get("Content-Encoding")
|
||||
return parser.ParseStream(req.Body, ce, func(series []parser.Series) error {
|
||||
return insertRows(at, series, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -49,18 +52,10 @@ func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmars
|
||||
Name: "__name__",
|
||||
Value: ss.Metric,
|
||||
})
|
||||
if ss.Host != "" {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "host",
|
||||
Value: ss.Host,
|
||||
})
|
||||
}
|
||||
if ss.Device != "" {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "device",
|
||||
Value: ss.Device,
|
||||
})
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "host",
|
||||
Value: ss.Host,
|
||||
})
|
||||
for _, tag := range ss.Tags {
|
||||
name, value := parser.SplitTag(tag)
|
||||
if name == "host" {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -19,7 +20,9 @@ var (
|
||||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return parser.ParseStream(r, insertRows)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, insertRows)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -36,8 +37,10 @@ var (
|
||||
//
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -49,13 +52,15 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(at, db, rows, extraLabels)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
q := req.URL.Query()
|
||||
precision := q.Get("precision")
|
||||
// Read db tag from https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint
|
||||
db := q.Get("db")
|
||||
return parser.ParseStream(req.Body, isGzipped, precision, db, func(db string, rows []parser.Row) error {
|
||||
return insertRows(at, db, rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -103,6 +104,7 @@ func main() {
|
||||
startTime := time.Now()
|
||||
remotewrite.Init()
|
||||
common.StartUnmarshalWorkers()
|
||||
writeconcurrencylimiter.Init()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, func(r io.Reader) error {
|
||||
return influx.InsertHandlerForReader(r, false)
|
||||
@@ -205,7 +207,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
{"targets", "status for discovered active targets"},
|
||||
{"service-discovery", "labels before and after relabeling for discovered targets"},
|
||||
{"metric-relabel-debug", "debug metric relabeling"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"metrics", "available service metrics"},
|
||||
@@ -216,23 +217,13 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
|
||||
path := strings.Replace(r.URL.Path, "//", "/", -1)
|
||||
if strings.HasPrefix(path, "/prometheus/api/v1/import/prometheus") || strings.HasPrefix(path, "/api/v1/import/prometheus") {
|
||||
prometheusimportRequests.Inc()
|
||||
if err := prometheusimport.InsertHandler(nil, r); err != nil {
|
||||
prometheusimportErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "datadog/") {
|
||||
// Trim suffix from paths starting from /datadog/ in order to support legacy DataDog agent.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2670
|
||||
path = strings.TrimSuffix(path, "/")
|
||||
}
|
||||
switch path {
|
||||
case "/prometheus/api/v1/write", "/api/v1/write":
|
||||
case "/api/v1/write":
|
||||
prometheusWriteRequests.Inc()
|
||||
if err := promremotewrite.InsertHandler(nil, r); err != nil {
|
||||
prometheusWriteErrors.Inc()
|
||||
@@ -241,7 +232,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/prometheus/api/v1/import", "/api/v1/import":
|
||||
case "/api/v1/import":
|
||||
vmimportRequests.Inc()
|
||||
if err := vmimport.InsertHandler(nil, r); err != nil {
|
||||
vmimportErrors.Inc()
|
||||
@@ -250,7 +241,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/prometheus/api/v1/import/csv", "/api/v1/import/csv":
|
||||
case "/api/v1/import/csv":
|
||||
csvimportRequests.Inc()
|
||||
if err := csvimport.InsertHandler(nil, r); err != nil {
|
||||
csvimportErrors.Inc()
|
||||
@@ -259,7 +250,16 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/prometheus/api/v1/import/native", "/api/v1/import/native":
|
||||
case "/api/v1/import/prometheus":
|
||||
prometheusimportRequests.Inc()
|
||||
if err := prometheusimport.InsertHandler(nil, r); err != nil {
|
||||
prometheusimportErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/api/v1/import/native":
|
||||
nativeimportRequests.Inc()
|
||||
if err := native.InsertHandler(nil, r); err != nil {
|
||||
nativeimportErrors.Inc()
|
||||
@@ -268,7 +268,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/influx/write", "/influx/api/v2/write", "/write", "/api/v2/write":
|
||||
case "/write", "/api/v2/write":
|
||||
influxWriteRequests.Inc()
|
||||
if err := influx.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
influxWriteErrors.Inc()
|
||||
@@ -277,7 +277,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/influx/query", "/query":
|
||||
case "/query":
|
||||
influxQueryRequests.Inc()
|
||||
influxutils.WriteDatabaseNames(w)
|
||||
return true
|
||||
@@ -316,29 +316,15 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprintf(w, `{}`)
|
||||
return true
|
||||
case "/prometheus/targets", "/targets":
|
||||
case "/targets":
|
||||
promscrapeTargetsRequests.Inc()
|
||||
promscrape.WriteHumanReadableTargetsStatus(w, r)
|
||||
return true
|
||||
case "/prometheus/service-discovery", "/service-discovery":
|
||||
case "/service-discovery":
|
||||
promscrapeServiceDiscoveryRequests.Inc()
|
||||
promscrape.WriteServiceDiscovery(w, r)
|
||||
return true
|
||||
case "/prometheus/metric-relabel-debug", "/metric-relabel-debug":
|
||||
promscrapeMetricRelabelDebugRequests.Inc()
|
||||
promscrape.WriteMetricRelabelDebug(w, r)
|
||||
return true
|
||||
case "/prometheus/target-relabel-debug", "/target-relabel-debug":
|
||||
promscrapeTargetRelabelDebugRequests.Inc()
|
||||
promscrape.WriteTargetRelabelDebug(w, r)
|
||||
return true
|
||||
case "/prometheus/api/v1/targets", "/api/v1/targets":
|
||||
promscrapeAPIV1TargetsRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
state := r.FormValue("state")
|
||||
promscrape.WriteAPIV1Targets(w, state)
|
||||
return true
|
||||
case "/prometheus/target_response", "/target_response":
|
||||
case "/target_response":
|
||||
promscrapeTargetResponseRequests.Inc()
|
||||
if err := promscrape.WriteTargetResponse(w, r); err != nil {
|
||||
promscrapeTargetResponseErrors.Inc()
|
||||
@@ -346,17 +332,27 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
}
|
||||
return true
|
||||
case "/prometheus/config", "/config":
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
case "/config":
|
||||
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
promscrapeConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
promscrape.WriteConfigData(w)
|
||||
return true
|
||||
case "/prometheus/api/v1/status/config", "/api/v1/status/config":
|
||||
case "/api/v1/status/config":
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#config
|
||||
if !httpserver.CheckAuthFlag(w, r, *configAuthKey, "configAuthKey") {
|
||||
if *configAuthKey != "" && r.FormValue("authKey") != *configAuthKey {
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("The provided authKey doesn't match -configAuthKey"),
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
promscrapeStatusConfigRequests.Inc()
|
||||
@@ -365,7 +361,13 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
promscrape.WriteConfigData(&bb)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%q}}`, bb.B)
|
||||
return true
|
||||
case "/prometheus/-/reload", "/-/reload":
|
||||
case "/api/v1/targets":
|
||||
promscrapeAPIV1TargetsRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
state := r.FormValue("state")
|
||||
promscrape.WriteAPIV1Targets(w, state)
|
||||
return true
|
||||
case "/-/reload":
|
||||
promscrapeConfigReloadRequests.Inc()
|
||||
procutil.SelfSIGHUP()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
@@ -407,16 +409,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
httpserver.Errorf(w, r, "cannot obtain auth token: %s", err)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(p.Suffix, "prometheus/api/v1/import/prometheus") {
|
||||
prometheusimportRequests.Inc()
|
||||
if err := prometheusimport.InsertHandler(at, r); err != nil {
|
||||
prometheusimportErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(p.Suffix, "datadog/") {
|
||||
// Trim suffix from paths starting from /datadog/ in order to support legacy DataDog agent.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2670
|
||||
@@ -450,6 +442,15 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "prometheus/api/v1/import/prometheus":
|
||||
prometheusimportRequests.Inc()
|
||||
if err := prometheusimport.InsertHandler(at, r); err != nil {
|
||||
prometheusimportErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "prometheus/api/v1/import/native":
|
||||
nativeimportRequests.Inc()
|
||||
if err := native.InsertHandler(at, r); err != nil {
|
||||
@@ -543,11 +544,7 @@ var (
|
||||
|
||||
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
|
||||
promscrapeServiceDiscoveryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/service-discovery"}`)
|
||||
|
||||
promscrapeMetricRelabelDebugRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/metric-relabel-debug"}`)
|
||||
promscrapeTargetRelabelDebugRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/target-relabel-debug"}`)
|
||||
|
||||
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`)
|
||||
promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`)
|
||||
|
||||
promscrapeTargetResponseRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/target_response"}`)
|
||||
promscrapeTargetResponseErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/target_response"}`)
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -30,8 +31,10 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
return err
|
||||
}
|
||||
isGzip := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzip, func(block *parser.Block) error {
|
||||
return insertRows(at, block, extraLabels)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req.Body, isGzip, func(block *parser.Block) error {
|
||||
return insertRows(at, block, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -19,7 +20,9 @@ var (
|
||||
//
|
||||
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return parser.ParseStream(r, insertRows)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, insertRows)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -24,8 +25,10 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -30,17 +31,21 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
}, nil)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
}, nil)
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader with optional gzip format
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return parser.ParseStream(r, 0, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows, nil)
|
||||
}, nil)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, 0, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows, nil)
|
||||
}, nil)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.Label) error {
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -28,15 +29,19 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, extraLabels)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(req.Body, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader) error {
|
||||
return parser.ParseStream(r, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, nil)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, func(tss []prompb.TimeSeries) error {
|
||||
return insertRows(at, tss, nil)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -24,46 +24,46 @@ var (
|
||||
"By default the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
|
||||
"is sent after temporary unavailability of the remote storage")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
|
||||
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
|
||||
proxyURL = flagutil.NewArray("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
|
||||
"Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234")
|
||||
|
||||
tlsInsecureSkipVerify = flagutil.NewArrayBool("remoteWrite.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCertFile = flagutil.NewArrayString("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting "+
|
||||
tlsCertFile = flagutil.NewArray("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting "+
|
||||
"to the corresponding -remoteWrite.url")
|
||||
tlsKeyFile = flagutil.NewArrayString("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCAFile = flagutil.NewArrayString("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. "+
|
||||
tlsKeyFile = flagutil.NewArray("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCAFile = flagutil.NewArray("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flagutil.NewArrayString("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to the corresponding -remoteWrite.url. "+
|
||||
tlsServerName = flagutil.NewArray("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to the corresponding -remoteWrite.url. "+
|
||||
"By default the server name from -remoteWrite.url is used")
|
||||
|
||||
headers = flagutil.NewArrayString("remoteWrite.headers", "Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. "+
|
||||
headers = flagutil.NewArray("remoteWrite.headers", "Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. "+
|
||||
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. "+
|
||||
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPasswordFile = flagutil.NewArrayString("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+
|
||||
basicAuthUsername = flagutil.NewArray("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPassword = flagutil.NewArray("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPasswordFile = flagutil.NewArray("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+
|
||||
"The file is re-read every second")
|
||||
bearerToken = flagutil.NewArrayString("remoteWrite.bearerToken", "Optional bearer auth token to use for the corresponding -remoteWrite.url")
|
||||
bearerTokenFile = flagutil.NewArrayString("remoteWrite.bearerTokenFile", "Optional path to bearer token file to use for the corresponding -remoteWrite.url. "+
|
||||
bearerToken = flagutil.NewArray("remoteWrite.bearerToken", "Optional bearer auth token to use for the corresponding -remoteWrite.url")
|
||||
bearerTokenFile = flagutil.NewArray("remoteWrite.bearerTokenFile", "Optional path to bearer token file to use for the corresponding -remoteWrite.url. "+
|
||||
"The token is re-read from the file every second")
|
||||
|
||||
oauth2ClientID = flagutil.NewArrayString("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecret = flagutil.NewArrayString("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
oauth2ClientID = flagutil.NewArray("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecret = flagutil.NewArray("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArray("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
|
||||
oauth2TokenURL = flagutil.NewArray("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArray("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
|
||||
awsUseSigv4 = flagutil.NewArrayBool("remoteWrite.aws.useSigv4", "Enables SigV4 request signing for the corresponding -remoteWrite.url. "+
|
||||
"It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled")
|
||||
awsEC2Endpoint = flagutil.NewArrayString("remoteWrite.aws.ec2Endpoint", "Optional AWS EC2 API endpoint to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsSTSEndpoint = flagutil.NewArrayString("remoteWrite.aws.stsEndpoint", "Optional AWS STS API endpoint to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsRegion = flagutil.NewArrayString("remoteWrite.aws.region", "Optional AWS region to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsRoleARN = flagutil.NewArrayString("remoteWrite.aws.roleARN", "Optional AWS roleARN to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsAccessKey = flagutil.NewArrayString("remoteWrite.aws.accessKey", "Optional AWS AccessKey to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsService = flagutil.NewArrayString("remoteWrite.aws.service", "Optional AWS Service to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. "+
|
||||
awsEC2Endpoint = flagutil.NewArray("remoteWrite.aws.ec2Endpoint", "Optional AWS EC2 API endpoint to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsSTSEndpoint = flagutil.NewArray("remoteWrite.aws.stsEndpoint", "Optional AWS STS API endpoint to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsRegion = flagutil.NewArray("remoteWrite.aws.region", "Optional AWS region to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsRoleARN = flagutil.NewArray("remoteWrite.aws.roleARN", "Optional AWS roleARN to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsAccessKey = flagutil.NewArray("remoteWrite.aws.accessKey", "Optional AWS AccessKey to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsService = flagutil.NewArray("remoteWrite.aws.service", "Optional AWS Service to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. "+
|
||||
"Defaults to \"aps\"")
|
||||
awsSecretKey = flagutil.NewArrayString("remoteWrite.aws.secretKey", "Optional AWS SecretKey to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
awsSecretKey = flagutil.NewArray("remoteWrite.aws.secretKey", "Optional AWS SecretKey to use for the corresponding -remoteWrite.url if -remoteWrite.aws.useSigv4 is set")
|
||||
)
|
||||
|
||||
type client struct {
|
||||
|
||||
@@ -195,7 +195,7 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
||||
}
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
if len(bb.B) <= maxUnpackedBlockSize.IntN() {
|
||||
if len(bb.B) <= maxUnpackedBlockSize.N {
|
||||
zb := snappyBufPool.Get()
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
writeRequestBufPool.Put(bb)
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
func TestPushWriteRequest(t *testing.T) {
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
t.Run(fmt.Sprintf("%d", rowsCount), func(t *testing.T) {
|
||||
testPushWriteRequest(t, rowsCount)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func testPushWriteRequest(t *testing.T, rowsCount int) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock)
|
||||
b := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
zb := snappy.Encode(nil, b)
|
||||
maxPushBlockLen := len(zb)
|
||||
minPushBlockLen := maxPushBlockLen / 2
|
||||
if pushBlockLen < minPushBlockLen {
|
||||
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be at least %d bytes", pushBlockLen, minPushBlockLen)
|
||||
}
|
||||
if pushBlockLen > maxPushBlockLen {
|
||||
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be smaller or equal to %d bytes", pushBlockLen, maxPushBlockLen)
|
||||
}
|
||||
}
|
||||
|
||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompbmarshal.WriteRequest {
|
||||
var wr prompbmarshal.WriteRequest
|
||||
for i := 0; i < seriesCount; i++ {
|
||||
var labels []prompbmarshal.Label
|
||||
for j := 0; j < labelsCount; j++ {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: fmt.Sprintf("label_%d_%d", i, j),
|
||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||
})
|
||||
}
|
||||
wr.Timeseries = append(wr.Timeseries, prompbmarshal.TimeSeries{
|
||||
Labels: labels,
|
||||
Samples: []prompbmarshal.Sample{
|
||||
{
|
||||
Value: float64(i),
|
||||
Timestamp: 1000 * int64(i),
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
return &wr
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/klauspost/compress/s2"
|
||||
)
|
||||
|
||||
func BenchmarkCompressWriteRequestSnappy(b *testing.B) {
|
||||
b.Run("snappy", func(b *testing.B) {
|
||||
benchmarkCompressWriteRequest(b, snappy.Encode)
|
||||
})
|
||||
b.Run("s2", func(b *testing.B) {
|
||||
benchmarkCompressWriteRequest(b, s2.EncodeSnappy)
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkCompressWriteRequest(b *testing.B, compressFunc func(dst, src []byte) []byte) {
|
||||
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||
b.Run(fmt.Sprintf("rows_%d", rowsCount), func(b *testing.B) {
|
||||
wr := newTestWriteRequest(rowsCount, 10)
|
||||
data := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(rowsCount))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var zb []byte
|
||||
for pb.Next() {
|
||||
zb = compressFunc(zb[:cap(zb)], data)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -13,19 +13,18 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
unparsedLabelsGlobal = flagutil.NewArray("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
||||
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
||||
"The path can point either to local file or to http url. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
|
||||
relabelConfigPaths = flagutil.NewArrayString("remoteWrite.urlRelabelConfig", "Optional path to relabel configs for the corresponding -remoteWrite.url. "+
|
||||
"See also -remoteWrite.relabelConfig. The path can point either to local file or to http url. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#relabeling")
|
||||
|
||||
usePromCompatibleNaming = flag.Bool("usePromCompatibleNaming", false, "Whether to replace characters unsupported by Prometheus with underscores "+
|
||||
"in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. "+
|
||||
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. "+
|
||||
"The path can point either to local file or to http url. These entries are applied to all the metrics "+
|
||||
"before sending them to -remoteWrite.url. See https://docs.victoriametrics.com/vmagent.html#relabeling for details")
|
||||
relabelDebugGlobal = flag.Bool("remoteWrite.relabelDebug", false, "Whether to log metrics before and after relabeling with -remoteWrite.relabelConfig. "+
|
||||
"If the -remoteWrite.relabelDebug is enabled, then the metrics aren't sent to remote storage. This is useful for debugging the relabeling configs")
|
||||
relabelConfigPaths = flagutil.NewArray("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url. "+
|
||||
"The path can point either to local file or to http url")
|
||||
relabelDebug = flagutil.NewArrayBool("remoteWrite.urlRelabelDebug", "Whether to log metrics before and after relabeling with -remoteWrite.urlRelabelConfig. "+
|
||||
"If the -remoteWrite.urlRelabelDebug is enabled, then the metrics aren't sent to the corresponding -remoteWrite.url. "+
|
||||
"This is useful for debugging the relabeling configs")
|
||||
)
|
||||
|
||||
var labelsGlobal []prompbmarshal.Label
|
||||
@@ -39,7 +38,7 @@ func CheckRelabelConfigs() error {
|
||||
func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
var rcs relabelConfigs
|
||||
if *relabelConfigPathGlobal != "" {
|
||||
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal, *relabelDebugGlobal)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
||||
}
|
||||
@@ -55,7 +54,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
// Skip empty relabel config.
|
||||
continue
|
||||
}
|
||||
prc, err := promrelabel.LoadRelabelConfigs(path)
|
||||
prc, err := promrelabel.LoadRelabelConfigs(path, relabelDebug.GetOptionalArg(i))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
||||
}
|
||||
@@ -88,7 +87,7 @@ func initLabelsGlobal() {
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
|
||||
if len(extraLabels) == 0 && pcs.Len() == 0 && !*usePromCompatibleNaming {
|
||||
if len(extraLabels) == 0 && pcs.Len() == 0 {
|
||||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
@@ -108,20 +107,7 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab
|
||||
labels = append(labels, *extraLabel)
|
||||
}
|
||||
}
|
||||
if *usePromCompatibleNaming {
|
||||
// Replace unsupported Prometheus chars in label names and metric names with underscores.
|
||||
tmpLabels := labels[labelsLen:]
|
||||
for j := range tmpLabels {
|
||||
label := &tmpLabels[j]
|
||||
if label.Name == "__name__" {
|
||||
label.Value = promrelabel.SanitizeName(label.Value)
|
||||
} else {
|
||||
label.Name = promrelabel.SanitizeName(label.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
labels = pcs.Apply(labels, labelsLen)
|
||||
labels = promrelabel.FinalizeLabels(labels[:labelsLen], labels[labelsLen:])
|
||||
labels = pcs.Apply(labels, labelsLen, true)
|
||||
if len(labels) == labelsLen {
|
||||
// Drop the current time series, since relabeling removed all the labels.
|
||||
continue
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
func TestApplyRelabeling(t *testing.T) {
|
||||
f := func(extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
|
||||
rctx := &relabelCtx{}
|
||||
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
|
||||
gotTss := rctx.applyRelabeling(tss, extraLabels, pcs)
|
||||
if !reflect.DeepEqual(gotTss, expTss) {
|
||||
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, gotTss)
|
||||
}
|
||||
}
|
||||
|
||||
f(nil, nil, "up", "up")
|
||||
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, "up", `up{foo="bar"}`)
|
||||
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, `up{foo="baz"}`, `up{foo="bar"}`)
|
||||
|
||||
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
- action: labeldrop
|
||||
regex: "env.*"
|
||||
`))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
f(nil, pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
|
||||
|
||||
oldVal := *usePromCompatibleNaming
|
||||
*usePromCompatibleNaming = true
|
||||
f(nil, nil, `foo.bar`, `foo_bar`)
|
||||
*usePromCompatibleNaming = oldVal
|
||||
}
|
||||
|
||||
func parseSeries(data string) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
tss = append(tss, prompbmarshal.TimeSeries{
|
||||
Labels: promutils.MustNewLabelsFromString(data).GetLabels(),
|
||||
})
|
||||
return tss
|
||||
}
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
@@ -21,17 +20,16 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support Prometheus remote_write API. "+
|
||||
remoteWriteURLs = flagutil.NewArray("remoteWrite.url", "Remote storage URL to write data to. It must support Prometheus remote_write API. "+
|
||||
"It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.multitenantURL")
|
||||
remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
|
||||
remoteWriteMultitenantURLs = flagutil.NewArray("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
@@ -40,7 +38,7 @@ var (
|
||||
"isn't enough for sending high volume of collected data to remote storage. Default value is 2 * numberOfAvailableCPUs")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
maxPendingBytesPerURL = flagutil.NewArrayBytes("remoteWrite.maxDiskUsagePerURL", "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
maxPendingBytesPerURL = flagutil.NewBytes("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
|
||||
"Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500MB. "+
|
||||
"Disk usage is unlimited if the value is set to 0")
|
||||
@@ -59,13 +57,6 @@ var (
|
||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
|
||||
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
|
||||
|
||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html . "+
|
||||
"See also -remoteWrite.streamAggr.keepInput")
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples after the aggregation with -remoteWrite.streamAggr.config. "+
|
||||
"By default the input is dropped after the aggregation, so only the aggregate data is sent to the -remoteWrite.url. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -149,9 +140,6 @@ func Init() {
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
|
||||
if len(*remoteWriteURLs) > 0 {
|
||||
rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
|
||||
}
|
||||
@@ -166,31 +154,18 @@ func Init() {
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
configReloads.Inc()
|
||||
logger.Infof("SIGHUP received; reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
|
||||
rcs, err := loadRelabelConfigs()
|
||||
if err != nil {
|
||||
configReloadErrors.Inc()
|
||||
configSuccess.Set(0)
|
||||
logger.Errorf("cannot reload relabel configs; preserving the previous configs; error: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
allRelabelConfigs.Store(rcs)
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
logger.Infof("Successfully reloaded relabel configs")
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
var (
|
||||
configReloads = metrics.NewCounter(`vmagent_relabel_config_reloads_total`)
|
||||
configReloadErrors = metrics.NewCounter(`vmagent_relabel_config_reloads_errors_total`)
|
||||
configSuccess = metrics.NewCounter(`vmagent_relabel_config_last_reload_successful`)
|
||||
configTimestamp = metrics.NewCounter(`vmagent_relabel_config_last_reload_success_timestamp_seconds`)
|
||||
)
|
||||
|
||||
func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
|
||||
if len(urls) == 0 {
|
||||
logger.Panicf("BUG: urls must be non-empty")
|
||||
@@ -260,7 +235,7 @@ func Stop() {
|
||||
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured `-remoteWrite.url`.
|
||||
// If at isn't nil, the data is pushed to the configured `-remoteWrite.multitenantURL`.
|
||||
// If at isn't nil, the the data is pushed to the configured `-remoteWrite.multitenantURL`.
|
||||
//
|
||||
// Note that wr may be modified by Push due to relabeling and rounding.
|
||||
func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
@@ -444,13 +419,9 @@ var (
|
||||
)
|
||||
|
||||
type remoteWriteCtx struct {
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
|
||||
sas *streamaggr.Aggregators
|
||||
streamAggrKeepInput bool
|
||||
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
pss []*pendingSeries
|
||||
pssNextIdx uint64
|
||||
|
||||
@@ -465,8 +436,7 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
pqURL.Fragment = ""
|
||||
h := xxhash.Sum64([]byte(pqURL.String()))
|
||||
queuePath := fmt.Sprintf("%s/persistent-queue/%d_%016X", *tmpDataPath, argIdx+1, h)
|
||||
maxPendingBytes := maxPendingBytesPerURL.GetOptionalArgOrDefault(argIdx, 0)
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes)
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytesPerURL.N)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
@@ -482,7 +452,6 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
}
|
||||
c.init(argIdx, *queues, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
|
||||
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
|
||||
pssLen := *queues
|
||||
@@ -495,8 +464,7 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
return &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
@@ -505,19 +473,6 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
|
||||
}
|
||||
|
||||
// Initialize sas
|
||||
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
|
||||
if sasFile != "" {
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggrFile=%q: %s", sasFile, err)
|
||||
}
|
||||
rwctx.sas = sas
|
||||
rwctx.streamAggrKeepInput = streamAggrKeepInput.GetOptionalArg(argIdx)
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) MustStop() {
|
||||
@@ -529,8 +484,6 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.fq.UnblockAllReaders()
|
||||
rwctx.c.MustStop()
|
||||
rwctx.c = nil
|
||||
rwctx.sas.MustStop()
|
||||
rwctx.sas = nil
|
||||
rwctx.fq.MustClose()
|
||||
rwctx.fq = nil
|
||||
|
||||
@@ -539,7 +492,6 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
// Apply relabeling
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
@@ -557,17 +509,11 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
rowsCountAfterRelabel := getRowsCount(tss)
|
||||
rwctx.rowsDroppedByRelabel.Add(rowsCountBeforeRelabel - rowsCountAfterRelabel)
|
||||
}
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
rowsCount := getRowsCount(tss)
|
||||
rwctx.rowsPushedAfterRelabel.Add(rowsCount)
|
||||
|
||||
// Apply stream aggregation if any
|
||||
rwctx.sas.Push(tss)
|
||||
if rwctx.sas == nil || rwctx.streamAggrKeepInput {
|
||||
// Push samples to the remote storage
|
||||
rwctx.pushInternal(tss)
|
||||
}
|
||||
|
||||
// Return back relabeling contexts to the pool
|
||||
pss[idx].Push(tss)
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssRelabelPool.Put(v)
|
||||
@@ -575,12 +521,6 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
}
|
||||
|
||||
var tssRelabelPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
a := []prompbmarshal.TimeSeries{}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -30,16 +31,20 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
|
||||
return parser.ParseStream(req.Body, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
return parser.ParseStream(r, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows, nil)
|
||||
return writeconcurrencylimiter.Do(func() error {
|
||||
return parser.ParseStream(r, isGzipped, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows, nil)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -69,17 +69,16 @@ Then configure `vmalert` accordingly:
|
||||
-external.label=replica=a # Multiple external labels may be set
|
||||
```
|
||||
|
||||
Note there's a separate `-remoteWrite.url` command-line flag to allow writing results of
|
||||
Note there's a separate `remoteWrite.url` to allow writing results of
|
||||
alerting/recording rules into a different storage than the initial data that's
|
||||
queried. This allows using `vmalert` to aggregate data from a short-term,
|
||||
high-frequency, high-cardinality storage into a long-term storage with
|
||||
decreased cardinality and a bigger interval between samples.
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
|
||||
See the full list of configuration flags in [configuration](#configuration) section.
|
||||
|
||||
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
|
||||
to specify different `-external.label` command-line flags in order to define which `vmalert` generated rules or alerts.
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
@@ -121,7 +120,7 @@ name: <string>
|
||||
# params:
|
||||
# nocache: ["1"] # disable caching for vmselect
|
||||
# denyPartialResponse: ["true"] # fail if one or more vmstorage nodes returned an error
|
||||
# extra_label: ["env=dev"] # apply additional label filter "env=dev" for all requests
|
||||
# extra_label: ["env=dev"] # apply additional label filter "env=dev" for all requests
|
||||
# see more details at https://docs.victoriametrics.com#prometheus-querying-api-enhancements
|
||||
params:
|
||||
[ <string>: [<string>, ...]]
|
||||
@@ -132,7 +131,7 @@ params:
|
||||
# headers:
|
||||
# - "CustomHeader: foo"
|
||||
# - "CustomHeader2: bar"
|
||||
# Headers set via this param have priority over headers set via `-datasource.headers` flag.
|
||||
# Headers set via this param have priority over headers set via `-datasource.headers` flag.
|
||||
headers:
|
||||
[ <string>, ...]
|
||||
|
||||
@@ -185,18 +184,6 @@ expr: <string>
|
||||
# as firing once they return.
|
||||
[ for: <duration> | default = 0s ]
|
||||
|
||||
# Whether to print debug information into logs.
|
||||
# Information includes alerts state changes and requests sent to the datasource.
|
||||
# Please note, that if rule's query params contain sensitive
|
||||
# information - it will be printed to logs.
|
||||
# Is applicable to alerting rules only.
|
||||
[ debug: <bool> | default = false ]
|
||||
|
||||
# Defines the number of rule's updates entries stored in memory
|
||||
# and available for view on rule's Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
|
||||
# Labels to add or overwrite for each alert.
|
||||
labels:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
@@ -206,9 +193,9 @@ annotations:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
```
|
||||
|
||||
#### Templating
|
||||
#### Templating
|
||||
|
||||
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations to format data, iterate over
|
||||
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations to format data, iterate over
|
||||
or execute expressions.
|
||||
The following variables are available in templating:
|
||||
|
||||
@@ -219,57 +206,18 @@ The following variables are available in templating:
|
||||
| $labels or .Labels | The list of labels of the current alert. Use as ".Labels.<label_name>". | {% raw %}Too high number of connections for {{ .Labels.instance }}{% endraw %} |
|
||||
| $alertID or .AlertID | The current alert's ID generated by vmalert. | {% raw %}Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}{% endraw %} |
|
||||
| $groupID or .GroupID | The current alert's group ID generated by vmalert. | {% raw %}Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}{% endraw %} |
|
||||
| $expr or .Expr | Alert's expression. Can be used for generating links to Grafana or other systems. | {% raw %}/api/v1/query?query={{ $expr|queryEscape }}{% endraw %} |
|
||||
| $for or .For | Alert's configured for param. | {% raw %}Number of connections is too high for more than {{ .For }}{% endraw %} |
|
||||
| $expr or .Expr | Alert's expression. Can be used for generating links to Grafana or other systems. | {% raw %}/api/v1/query?query={{ $expr|quotesEscape|queryEscape }}{% endraw %} |
|
||||
| $externalLabels or .ExternalLabels | List of labels configured via `-external.label` command-line flag. | {% raw %}Issues with {{ $labels.instance }} (datacenter-{{ $externalLabels.dc }}){% endraw %} |
|
||||
| $externalURL or .ExternalURL | URL configured via `-external.url` command-line flag. Used for cases when vmalert is hidden behind proxy. | {% raw %}Visit {{ $externalURL }} for more details{% endraw %} |
|
||||
|
||||
Additionally, `vmalert` provides some extra templating functions listed [here](#template-functions) and [reusable templates](#reusable-templates).
|
||||
|
||||
#### Template functions
|
||||
|
||||
`vmalert` provides the following template functions, which can be used during [templating](#templating):
|
||||
|
||||
- `args arg0 ... argN` - converts the input args into a map with `arg0`, ..., `argN` keys.
|
||||
- `externalURL` - returns the value of `-external.url` command-line flag.
|
||||
- `first` - returns the first result from the input query results returned by `query` function.
|
||||
- `htmlEscape` - escapes special chars in input string, so it can be safely embedded as a plaintext into HTML.
|
||||
- `humanize` - converts the input number into human-readable format by adding [metric prefixes](https://en.wikipedia.org/wiki/Metric_prefix).
|
||||
For example, `100000` is converted into `100K`.
|
||||
- `humanize1024` - converts the input number into human-readable format with 1024 base.
|
||||
For example, `1024` is converted into 1ki`.
|
||||
- `humanizeDuration` - converts the input number in seconds into human-readable duration.
|
||||
- `humanizePercentage` - converts the input number to percentage. For example, `0.123` is converted into `12.3%`.
|
||||
- `humanizeTimestamp` - converts the input unix timestamp into human-readable time.
|
||||
- `jsonEscape` - JSON-encodes the input string.
|
||||
- `label name` - returns the value of the label with the given `name` from the input query result.
|
||||
- `match regex` - matches the input string against the provided `regex`.
|
||||
- `parseDuration` - parses the input string into duration in seconds. For example, `1h` is parsed into `3600`.
|
||||
- `parseDurationTime` - parses the input string into [time.Duration](https://pkg.go.dev/time#Duration).
|
||||
- `pathEscape` - escapes the input string, so it can be safely put inside path part of URL.
|
||||
- `pathPrefix` - returns the path part of the `-external.url` command-line flag.
|
||||
- `query` - executes the [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) query against `-datasource.url` and returns the query result.
|
||||
For example, {% raw %}`{{ query "sort_desc(process_resident_memory_bytes)" | first | value }}`{% endraw %} executes the `sort_desc(process_resident_memory_bytes)`
|
||||
query at `-datasource.url` and returns the first result.
|
||||
- `queryEscape` - escapes the input string, so it can be safely put inside [query arg](https://en.wikipedia.org/wiki/Percent-encoding) part of URL.
|
||||
- `quotesEscape` - escapes the input string, so it can be safely embedded into JSON string.
|
||||
- `reReplaceAll regex repl` - replaces all the occurences of the `regex` in input string with the `repl`.
|
||||
- `safeHtml` - marks the input string as safe to use in HTML context without the need to html-escape it.
|
||||
- `sortByLabel name` - sorts the input query results by the label with the given `name`.
|
||||
- `stripDomain` - leaves the first part of the domain. For example, `foo.bar.baz` is converted to `foo`.
|
||||
The port part is left in the output string. E.g. `foo.bar:1234` is converted into `foo:1234`.
|
||||
- `stripPort` - strips `port` part from `host:port` input string.
|
||||
- `strvalue` - returns the metric name from the input query result.
|
||||
- `title` - converts the first letters of every input word to uppercase.
|
||||
- `toLower` - converts all the chars in the input string to lowercase.
|
||||
- `toTime` - converts the input unix timestamp to [time.Time](https://pkg.go.dev/time#Time).
|
||||
- `toUpper` - converts all the chars in the input string to uppercase.
|
||||
- `value` - returns the numeric value from the input query result.
|
||||
Additionally, `vmalert` provides some extra templating functions
|
||||
listed [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/templates/template.go)
|
||||
and [reusable templates](#reusable-templates).
|
||||
|
||||
#### Reusable templates
|
||||
|
||||
Like in Alertmanager you can define [reusable templates](https://prometheus.io/docs/prometheus/latest/configuration/template_examples/#defining-reusable-templates)
|
||||
to share same templates across annotations. Just define the templates in a file and
|
||||
to share same templates across annotations. Just define the templates in a file and
|
||||
set the path via `-rule.templates` flag.
|
||||
|
||||
For example, template `grafana.filter` can be defined as following:
|
||||
@@ -325,12 +273,6 @@ expr: <string>
|
||||
# Labels to add or overwrite before storing the result.
|
||||
labels:
|
||||
[ <labelname>: <labelvalue> ]
|
||||
|
||||
|
||||
# Defines the number of rule's updates entries stored in memory
|
||||
# and available for view on rule's Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
```
|
||||
|
||||
For recording rules to work `-remoteWrite.url` must be specified.
|
||||
@@ -366,7 +308,7 @@ There are the following approaches exist for alerting and recording rules across
|
||||
rules to `AccountID=123`.
|
||||
|
||||
* To specify `tenant` parameter per each alerting and recording group if
|
||||
[enterprise version of vmalert](https://docs.victoriametrics.com/enterprise.html) is used
|
||||
[enterprise version of vmalert](https://victoriametrics.com/products/enterprise/) is used
|
||||
with `-clusterMode` command-line flag. For example:
|
||||
|
||||
```yaml
|
||||
@@ -382,10 +324,6 @@ groups:
|
||||
# Rules for accountID=456, projectID=789
|
||||
```
|
||||
|
||||
The results of alerting and recording rules contain `vm_account_id` and `vm_project_id` labels
|
||||
if `-clusterMode` is enabled. These labels can be used during [templating](https://docs.victoriametrics.com/vmalert.html#templating),
|
||||
and help to identify to which account or project the triggered alert or produced recording belongs.
|
||||
|
||||
If `-clusterMode` is enabled, then `-datasource.url`, `-remoteRead.url` and `-remoteWrite.url` must
|
||||
contain only the hostname without tenant id. For example: `-datasource.url=http://vmselect:8481`.
|
||||
`vmalert` automatically adds the specified tenant to urls per each recording rule in this case.
|
||||
@@ -473,16 +411,15 @@ To avoid recording rules results and alerts state duplication in VictoriaMetrics
|
||||
don't forget to configure [deduplication](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#deduplication).
|
||||
The recommended value for `-dedup.minScrapeInterval` must be greater or equal to vmalert's `evaluation_interval`.
|
||||
If you observe inconsistent or "jumping" values in series produced by vmalert, try disabling `-datasource.queryTimeAlignment`
|
||||
command line flag. Because of alignment, two or more vmalert HA pairs will produce results with the same timestamps.
|
||||
But due of backfilling (data delivered to the datasource with some delay) values of such results may differ,
|
||||
command line flag. Because of alignment, two or more vmalert HA pairs will produce results with the same timestamps.
|
||||
But due of backfilling (data delivered to the datasource with some delay) values of such results may differ,
|
||||
which would affect deduplication logic and result into "jumping" datapoints.
|
||||
|
||||
Alertmanager will automatically deduplicate alerts with identical labels, so ensure that
|
||||
all `vmalert`s are having the same config.
|
||||
|
||||
Don't forget to configure [cluster mode](https://prometheus.io/docs/alerting/latest/alertmanager/)
|
||||
for Alertmanagers for better reliability. List all Alertmanager URLs in vmalert's `-notifier.url`
|
||||
to ensure [high availability](https://github.com/prometheus/alertmanager#high-availability).
|
||||
for Alertmanagers for better reliability.
|
||||
|
||||
This example uses single-node VM server for the sake of simplicity.
|
||||
Check how to replace it with [cluster VictoriaMetrics](#cluster-victoriametrics) if needed.
|
||||
@@ -495,8 +432,8 @@ This ability allows to aggregate data. For example, the following rule will calc
|
||||
metric `http_requests` on the `5m` interval:
|
||||
|
||||
```yaml
|
||||
- record: http_requests:avg5m
|
||||
expr: avg_over_time(http_requests[5m])
|
||||
- record: http_requests:avg5m
|
||||
expr: avg_over_time(http_requests[5m])
|
||||
```
|
||||
|
||||
Every time this rule will be evaluated, `vmalert` will backfill its results as a new time series `http_requests:avg5m`
|
||||
@@ -510,13 +447,13 @@ This ability allows to downsample data. For example, the following config will e
|
||||
groups:
|
||||
- name: my_group
|
||||
interval: 5m
|
||||
rules:
|
||||
rules:
|
||||
- record: http_requests:avg5m
|
||||
expr: avg_over_time(http_requests[5m])
|
||||
expr: avg_over_time(http_requests[5m])
|
||||
```
|
||||
|
||||
Ability of `vmalert` to be configured with different `-datasource.url` and `-remoteWrite.url` command-line flags
|
||||
allows reading data from one data source and backfilling results to another. This helps to build a system
|
||||
Ability of `vmalert` to be configured with different `datasource.url` and `remoteWrite.url` allows
|
||||
reading data from one data source and backfilling results to another. This helps to build a system
|
||||
for aggregating and downsampling the data.
|
||||
|
||||
The following example shows how to build a topology where `vmalert` will process data from one cluster
|
||||
@@ -530,7 +467,7 @@ or reducing resolution) and push results to "cold" cluster.
|
||||
|
||||
```
|
||||
./bin/vmalert -rule=downsampling-rules.yml \ # Path to the file with rules configuration. Supports wildcard
|
||||
-datasource.url=http://raw-cluster-vmselect:8481/select/0/prometheus # vmselect addr for executing recording rules expressions
|
||||
-datasource.url=http://raw-cluster-vmselect:8481/select/0/prometheus # vmselect addr for executing recordi ng rules expressions
|
||||
-remoteWrite.url=http://aggregated-cluster-vminsert:8480/insert/0/prometheus # vminsert addr to persist recording rules results
|
||||
```
|
||||
|
||||
@@ -540,7 +477,7 @@ Please note, [replay](#rules-backfilling) feature may be used for transforming h
|
||||
|
||||
Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only recording rules are used.
|
||||
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) and [downsampling](https://docs.victoriametrics.com/#downsampling).
|
||||
See also [downsampling docs](https://docs.victoriametrics.com/#downsampling).
|
||||
|
||||
#### Multiple remote writes
|
||||
|
||||
@@ -552,7 +489,7 @@ we recommend using [vmagent](https://docs.victoriametrics.com/vmagent.html) as f
|
||||
|
||||
In this topology, `vmalert` is configured to persist rule results to `vmagent`. And `vmagent`
|
||||
is configured to fan-out received data to two or more destinations.
|
||||
Using `vmagent` as a proxy provides additional benefits such as
|
||||
Using `vmagent` as a proxy provides additional benefits such as
|
||||
[data persisting when storage is unreachable](https://docs.victoriametrics.com/vmagent.html#replication-and-high-availability),
|
||||
or time series modification via [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling).
|
||||
|
||||
@@ -567,7 +504,6 @@ or time series modification via [relabeling](https://docs.victoriametrics.com/vm
|
||||
* `http://<vmalert-addr>/vmalert/api/v1/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in JSON format.
|
||||
Used as alert source in AlertManager.
|
||||
* `http://<vmalert-addr>/vmalert/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in web UI.
|
||||
* `http://<vmalert-addr>/vmalert/rule?group_id=<group_id>&rule_id=<rule_id>` - get rule status in web UI.
|
||||
* `http://<vmalert-addr>/metrics` - application metrics.
|
||||
* `http://<vmalert-addr>/-/reload` - hot configuration reload.
|
||||
|
||||
@@ -676,7 +612,7 @@ There are following non-required `replay` flags:
|
||||
Progress bar may generate a lot of log records, which is not formatted as standard VictoriaMetrics logger.
|
||||
It could break logs parsing by external system and generate additional load on it.
|
||||
|
||||
See full description for these flags in `./vmalert -help`.
|
||||
See full description for these flags in `./vmalert --help`.
|
||||
|
||||
### Limitations
|
||||
|
||||
@@ -687,77 +623,13 @@ See full description for these flags in `./vmalert -help`.
|
||||
## Monitoring
|
||||
|
||||
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
|
||||
The default list of alerting rules for these metric can be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker).
|
||||
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus so that the exported
|
||||
metrics may be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950-victoriametrics-vmalert/) for `vmalert` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview. Graphs on this dashboard contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
vmalert executes configured rules within certain intervals. It is expected that at the moment when rule is executed,
|
||||
the data is already present in configured `-datasource.url`:
|
||||
|
||||
<img alt="vmalert expected evaluation" src="vmalert_ts_normal.gif">
|
||||
|
||||
Usually, troubles start to appear when data in `-datasource.url` is delayed or absent. In such cases, evaluations
|
||||
may get empty response from datasource and produce empty recording rules or reset alerts state:
|
||||
|
||||
<img alt="vmalert evaluation when data is delayed" src="vmalert_ts_data_delay.gif">
|
||||
|
||||
By default, recently written samples to VictoriaMetrics aren't visible for queries for up to 30s.
|
||||
This behavior is controlled by `-search.latencyOffset` command-line flag and the `latency_offset` query ag at `vmselect`.
|
||||
Usually, this results into a 30s shift for recording rules results.
|
||||
Note that too small value passed to `-search.latencyOffset` or to `latency_offest` query arg may lead to incomplete query results.
|
||||
|
||||
Try the following recommendations in such cases:
|
||||
|
||||
* Always configure group's `evaluationInterval` to be bigger or equal to `scrape_interval` at which metrics
|
||||
are delivered to the datasource;
|
||||
* If you know in advance, that data in datasource is delayed - try changing vmalert's `-datasource.lookback`
|
||||
command-line flag to add a time shift for evaluations;
|
||||
* If time intervals between datapoints in datasource are irregular or `>=5min` - try changing vmalert's
|
||||
`-datasource.queryStep` command-line flag to specify how far search query can lookback for the recent datapoint.
|
||||
The recommendation is to have the step at least two times bigger than `scrape_interval`, since
|
||||
there are no guarantees that scrape will not fail.
|
||||
|
||||
Sometimes, it is not clear why some specific alert fired or didn't fire. It is very important to remember, that
|
||||
alerts with `for: 0` fire immediately when their expression becomes true. And alerts with `for > 0` will fire only
|
||||
after multiple consecutive evaluations, and at each evaluation their expression must be true. If at least one evaluation
|
||||
becomes false, then alert's state resets to the initial state.
|
||||
|
||||
If `-remoteWrite.url` command-line flag is configured, vmalert will persist alert's state in form of time series
|
||||
`ALERTS` and `ALERTS_FOR_STATE` to the specified destination. Such time series can be then queried via
|
||||
[vmui](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmui) or Grafana to track how alerts state
|
||||
changed in time.
|
||||
|
||||
vmalert stores last `-rule.updateEntriesLimit` (or `update_entries_limit` [per-rule config](https://docs.victoriametrics.com/vmalert.html#alerting-rules))
|
||||
state updates for each rule. To check updates, click on `Details` link next to rule's name on `/vmalert/groups` page
|
||||
and check the `Last updates` section:
|
||||
|
||||
<img alt="vmalert state" src="vmalert_state.png">
|
||||
|
||||
Rows in the section represent ordered rule evaluations and their results. The column `curl` contains an example of
|
||||
HTTP request sent by vmalert to the `-datasource.url` during evaluation. If specific state shows that there were
|
||||
no samples returned and curl command returns data - then it is very likely there was no data in datasource on the
|
||||
moment when rule was evaluated.
|
||||
|
||||
vmalert allows configuring more detailed logging for specific alerting rule. Just set `debug: true` in rule's configuration
|
||||
and vmalert will start printing additional log messages:
|
||||
```terminal
|
||||
2022-09-15T13:35:41.155Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:41+02:00: query returned 0 samples (elapsed: 5.896041ms)
|
||||
2022-09-15T13:35:56.149Z DEBUG datasource request: executing POST request with params "denyPartialResponse=true&query=sum%28vm_tcplistener_conns%7Binstance%3D%22localhost%3A8429%22%7D%29+by%28instance%29+%3E+0&step=15s&time=1663248945"
|
||||
2022-09-15T13:35:56.178Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:56+02:00: query returned 1 samples (elapsed: 28.368208ms)
|
||||
2022-09-15T13:35:56.178Z DEBUG datasource request: executing POST request with params "denyPartialResponse=true&query=sum%28vm_tcplistener_conns%7Binstance%3D%22localhost%3A8429%22%7D%29&step=15s&time=1663248945"
|
||||
2022-09-15T13:35:56.179Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:56+02:00: alert 10705778000901301787 {alertgroup="TestGroup",alertname="Conns",cluster="east-1",instance="localhost:8429",replica="a"} created in state PENDING
|
||||
...
|
||||
2022-09-15T13:36:56.153Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:36:56+02:00: alert 10705778000901301787 {alertgroup="TestGroup",alertname="Conns",cluster="east-1",instance="localhost:8429",replica="a"} PENDING => FIRING: 1m0s since becoming active at 2022-09-15 15:35:56.126006 +0200 CEST m=+39.384575417
|
||||
```
|
||||
|
||||
|
||||
## Profiling
|
||||
|
||||
`vmalert` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
@@ -798,7 +670,7 @@ The shortlist of configuration flags is the following:
|
||||
{% raw %}
|
||||
```
|
||||
-clusterMode
|
||||
If clusterMode is enabled, then vmalert automatically adds the tenant specified in config groups to -datasource.url, -remoteWrite.url and -remoteRead.url. See https://docs.victoriametrics.com/vmalert.html#multitenancy . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
If clusterMode is enabled, then vmalert automatically adds the tenant specified in config groups to -datasource.url, -remoteWrite.url and -remoteRead.url. See https://docs.victoriametrics.com/vmalert.html#multitenancy
|
||||
-configCheckInterval duration
|
||||
Interval for checking for changes in '-rule' or '-notifier.config' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes.
|
||||
-datasource.appendTypePrefix
|
||||
@@ -832,7 +704,7 @@ The shortlist of configuration flags is the following:
|
||||
-datasource.oauth2.tokenUrl string
|
||||
Optional OAuth2 tokenURL to use for -datasource.url.
|
||||
-datasource.queryStep duration
|
||||
How far a value can fallback to when evaluating queries. For example, if -datasource.queryStep=15s then param "step" with value "15s" will be added to every query. If set to 0, rule's evaluation interval will be used instead. (default 5m0s)
|
||||
queryStep defines how far a value can fallback to when evaluating queries. For example, if datasource.queryStep=15s then param "step" with value "15s" will be added to every query.If queryStep isn't specified, rule's evaluationInterval will be used instead.
|
||||
-datasource.queryTimeAlignment
|
||||
Whether to align "time" parameter with evaluation interval.Alignment supposed to produce deterministic results despite of number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257 (default true)
|
||||
-datasource.roundDigits int
|
||||
@@ -850,14 +722,14 @@ The shortlist of configuration flags is the following:
|
||||
-datasource.tlsServerName string
|
||||
Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used
|
||||
-datasource.url string
|
||||
Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL
|
||||
Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. E.g. http://127.0.0.1:8428 . See also '-datasource.disablePathAppend', '-datasource.showURL'.
|
||||
-defaultTenant.graphite string
|
||||
Default tenant for Graphite alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy .This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Default tenant for Graphite alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy
|
||||
-defaultTenant.prometheus string
|
||||
Default tenant for Prometheus alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
Default tenant for Prometheus alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy
|
||||
-disableAlertgroupLabel
|
||||
Whether to disable adding group's Name as label to generated alerts and time series.
|
||||
-dryRun
|
||||
-dryRun -rule
|
||||
Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
|
||||
@@ -866,11 +738,12 @@ The shortlist of configuration flags is the following:
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf
|
||||
-evaluationInterval duration
|
||||
How often to evaluate the rules (default 1m0s)
|
||||
-external.alert.source string
|
||||
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service. Supports templating - see https://docs.victoriametrics.com/vmalert.html#templating . For example, link to Grafana: -external.alert.source='explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr":{{$expr|jsonEscape|queryEscape}} },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]' . If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.
|
||||
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
|
||||
Supports templating. For example, link to Grafana: 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'. (default "{{.ExternalURL}}/vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}")
|
||||
-external.label array
|
||||
Optional label in the form 'Name=value' to add to all generated recording rules and alerts. Pass multiple -label flags in order to add multiple label sets.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -904,8 +777,6 @@ The shortlist of configuration flags is the following:
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -916,7 +787,7 @@ The shortlist of configuration flags is the following:
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
@@ -971,20 +842,20 @@ The shortlist of configuration flags is the following:
|
||||
Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-notifier.url array
|
||||
Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.
|
||||
Prometheus alertmanager URL, e.g. http://127.0.0.1:9093
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-promscrape.consul.waitTime duration
|
||||
Wait time used by Consul service discovery. Default value is used if not set
|
||||
-promscrape.consulSDCheckInterval duration
|
||||
Interval for checking for changes in Consul. This works only if consul_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#consul_sd_configs for details (default 30s)
|
||||
Interval for checking for changes in Consul. This works only if consul_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config for details (default 30s)
|
||||
-promscrape.discovery.concurrency int
|
||||
The maximum number of concurrent requests to Prometheus autodiscovery API (Consul, Kubernetes, etc.) (default 100)
|
||||
-promscrape.discovery.concurrentWaitTime duration
|
||||
The maximum duration for waiting to perform API requests if more than -promscrape.discovery.concurrency requests are simultaneously performed (default 1m0s)
|
||||
-promscrape.dnsSDCheckInterval duration
|
||||
Interval for checking for changes in dns. This works only if dns_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#dns_sd_configs for details (default 30s)
|
||||
Interval for checking for changes in dns. This works only if dns_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config for details (default 30s)
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
@@ -1067,8 +938,6 @@ The shortlist of configuration flags is the following:
|
||||
Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'.
|
||||
-remoteWrite.oauth2.tokenUrl string
|
||||
Optional OAuth2 tokenURL to use for -notifier.url.
|
||||
-remoteWrite.sendTimeout duration
|
||||
Timeout for sending data to the configured -remoteWrite.url. (default 30s)
|
||||
-remoteWrite.showURL
|
||||
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
|
||||
-remoteWrite.tlsCAFile string
|
||||
@@ -1118,8 +987,6 @@ The shortlist of configuration flags is the following:
|
||||
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
|
||||
absolute path to all .tpl files in root.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-rule.updateEntriesLimit int
|
||||
Defines the max number of rule's state updates stored in-memory. Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overriden per rule via update_entries_limit param. (default 20)
|
||||
-rule.validateExpressions
|
||||
Whether to validate rules expressions via MetricsQL engine (default true)
|
||||
-rule.validateTemplates
|
||||
@@ -1133,8 +1000,6 @@ The shortlist of configuration flags is the following:
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
@@ -1185,7 +1050,7 @@ and [DNS](https://prometheus.io/docs/prometheus/latest/configuration/configurati
|
||||
For example:
|
||||
|
||||
```
|
||||
static_configs:
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
@@ -1194,7 +1059,7 @@ consul_sd_configs:
|
||||
- server: localhost:8500
|
||||
services:
|
||||
- alertmanager
|
||||
|
||||
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- my.domain.com
|
||||
@@ -1203,8 +1068,6 @@ dns_sd_configs:
|
||||
```
|
||||
|
||||
The list of configured or discovered Notifiers can be explored via [UI](#Web).
|
||||
If Alertmanager runs in cluster mode then all its URLs needs to be available during discovery
|
||||
to ensure [high availability](https://github.com/prometheus/alertmanager#high-availability).
|
||||
|
||||
The configuration file [specification](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/notifier/config.go)
|
||||
is the following:
|
||||
@@ -1224,7 +1087,7 @@ is the following:
|
||||
# password and password_file are mutually exclusive.
|
||||
basic_auth:
|
||||
[ username: <string> ]
|
||||
[ password: <string> ]
|
||||
[ password: <secret> ]
|
||||
[ password_file: <string> ]
|
||||
|
||||
# Optional `Authorization` header configuration.
|
||||
@@ -1243,41 +1106,10 @@ authorization:
|
||||
tls_config:
|
||||
[ <tls_config> ]
|
||||
|
||||
# Configures Bearer authentication token via string
|
||||
bearer_token: <string>
|
||||
# or by passing path to the file with token.
|
||||
bearer_token_file: <string>
|
||||
|
||||
# Configures OAuth 2.0 authentication
|
||||
# see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#oauth2
|
||||
oauth2:
|
||||
[ <oauth2_config> ]
|
||||
|
||||
# Optional list of HTTP headers in form `header-name: value`
|
||||
# applied for all requests to notifiers
|
||||
# For example:
|
||||
# headers:
|
||||
# - "CustomHeader: foo"
|
||||
# - "CustomHeader2: bar"
|
||||
headers:
|
||||
[ <string>, ...]
|
||||
|
||||
# List of labeled statically configured Notifiers.
|
||||
#
|
||||
# Each list of targets may be additionally instructed with
|
||||
# authorization params. Target's authorization params will
|
||||
# inherit params from global authorization params if there
|
||||
# are no conflicts.
|
||||
static_configs:
|
||||
[ - targets: ]
|
||||
[ - '<host>' ]
|
||||
[ oauth2 ]
|
||||
[ basic_auth ]
|
||||
[ authorization ]
|
||||
[ tls_config ]
|
||||
[ bearer_token ]
|
||||
[ bearer_token_file ]
|
||||
[ headers ]
|
||||
targets:
|
||||
[ - '<host>' ]
|
||||
|
||||
# List of Consul service discovery configurations.
|
||||
# See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||
@@ -1338,7 +1170,7 @@ spec:
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.18.
|
||||
2. Run `make vmalert` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmalert` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -1354,7 +1186,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
|
||||
|
||||
### Development ARM build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.18.
|
||||
2. Run `make vmalert-linux-arm` or `make vmalert-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmalert-linux-arm` or `vmalert-linux-arm64` binary respectively and puts it into the `bin` folder.
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"hash/fnv"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -31,17 +30,24 @@ type AlertingRule struct {
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
EvalInterval time.Duration
|
||||
Debug bool
|
||||
|
||||
q datasource.Querier
|
||||
|
||||
alertsMu sync.RWMutex
|
||||
// guard status fields
|
||||
mu sync.RWMutex
|
||||
// stores list of active alerts
|
||||
alerts map[uint64]*notifier.Alert
|
||||
|
||||
// state stores recent state changes
|
||||
// during evaluations
|
||||
state *ruleState
|
||||
// stores last moment of time Exec was called
|
||||
lastExecTime time.Time
|
||||
// stores the duration of the last Exec call
|
||||
lastExecDuration time.Duration
|
||||
// stores last error that happened in Exec func
|
||||
// resets on every successful Exec
|
||||
// may be used as Health state
|
||||
lastExecError error
|
||||
// stores the number of samples returned during
|
||||
// the last evaluation
|
||||
lastExecSamples int
|
||||
|
||||
metrics *alertingRuleMetrics
|
||||
}
|
||||
@@ -65,29 +71,21 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
EvalInterval: group.Interval,
|
||||
Debug: cfg.Debug,
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
QueryParams: group.Params,
|
||||
Headers: group.Headers,
|
||||
Debug: cfg.Debug,
|
||||
}),
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
metrics: &alertingRuleMetrics{},
|
||||
}
|
||||
|
||||
if cfg.UpdateEntriesLimit != nil {
|
||||
ar.state = newRuleState(*cfg.UpdateEntriesLimit)
|
||||
} else {
|
||||
ar.state = newRuleState(*ruleUpdateEntriesLimit)
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
|
||||
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
ar.mu.RLock()
|
||||
defer ar.mu.RUnlock()
|
||||
var num int
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StatePending {
|
||||
@@ -98,8 +96,8 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
})
|
||||
ar.metrics.active = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_firing{%s}`, labels),
|
||||
func() float64 {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
ar.mu.RLock()
|
||||
defer ar.mu.RUnlock()
|
||||
var num int
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateFiring {
|
||||
@@ -110,16 +108,18 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.err == nil {
|
||||
ar.mu.RLock()
|
||||
defer ar.mu.RUnlock()
|
||||
if ar.lastExecError == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
return float64(e.samples)
|
||||
ar.mu.RLock()
|
||||
defer ar.mu.RUnlock()
|
||||
return float64(ar.lastExecSamples)
|
||||
})
|
||||
return ar
|
||||
}
|
||||
@@ -143,42 +143,12 @@ func (ar *AlertingRule) ID() uint64 {
|
||||
return ar.RuleID
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...interface{}) {
|
||||
if !ar.Debug {
|
||||
return
|
||||
}
|
||||
prefix := fmt.Sprintf("DEBUG rule %q:%q (%d) at %v: ",
|
||||
ar.GroupName, ar.Name, ar.RuleID, at.Format(time.RFC3339))
|
||||
|
||||
if a != nil {
|
||||
labelKeys := make([]string, len(a.Labels))
|
||||
var i int
|
||||
for k := range a.Labels {
|
||||
labelKeys[i] = k
|
||||
i++
|
||||
}
|
||||
sort.Strings(labelKeys)
|
||||
labels := make([]string, len(labelKeys))
|
||||
for i, l := range labelKeys {
|
||||
labels[i] = fmt.Sprintf("%s=%q", l, a.Labels[l])
|
||||
}
|
||||
labelsStr := strings.Join(labels, ",")
|
||||
prefix += fmt.Sprintf("alert %d {%s} ", a.ID, labelsStr)
|
||||
}
|
||||
msg := fmt.Sprintf(format, args...)
|
||||
logger.Infof("%s", prefix+msg)
|
||||
}
|
||||
|
||||
type labelSet struct {
|
||||
// origin labels extracted from received time series
|
||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||
// in case of conflicts, origin labels from time series preferred.
|
||||
// used for templating annotations
|
||||
// origin labels from series
|
||||
// used for templating
|
||||
origin map[string]string
|
||||
// processed labels includes origin labels
|
||||
// plus extra labels (group labels, service labels like alertNameLabel).
|
||||
// in case of conflicts, extra labels are preferred.
|
||||
// used as labels attached to notifier.Alert and ALERTS series written to remote storage.
|
||||
// processed labels with additional data
|
||||
// used as Alert labels
|
||||
processed map[string]string
|
||||
}
|
||||
|
||||
@@ -186,7 +156,7 @@ type labelSet struct {
|
||||
// to labelSet which contains original and processed labels.
|
||||
func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
||||
ls := &labelSet{
|
||||
origin: make(map[string]string),
|
||||
origin: make(map[string]string, len(m.Labels)),
|
||||
processed: make(map[string]string),
|
||||
}
|
||||
for _, l := range m.Labels {
|
||||
@@ -208,23 +178,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
}
|
||||
for k, v := range extraLabels {
|
||||
ls.processed[k] = v
|
||||
if _, ok := ls.origin[k]; !ok {
|
||||
ls.origin[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
// set additional labels to identify group and rule name
|
||||
if ar.Name != "" {
|
||||
ls.processed[alertNameLabel] = ar.Name
|
||||
if _, ok := ls.origin[alertNameLabel]; !ok {
|
||||
ls.origin[alertNameLabel] = ar.Name
|
||||
}
|
||||
}
|
||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||
ls.processed[alertGroupNameLabel] = ar.GroupName
|
||||
if _, ok := ls.origin[alertGroupNameLabel]; !ok {
|
||||
ls.origin[alertGroupNameLabel] = ar.GroupName
|
||||
}
|
||||
}
|
||||
return ls, nil
|
||||
}
|
||||
@@ -282,55 +243,39 @@ const resolvedRetention = 15 * time.Minute
|
||||
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
||||
func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
start := time.Now()
|
||||
qMetrics, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||
curState := ruleStateEntry{
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(qMetrics),
|
||||
err: err,
|
||||
curl: requestToCurl(req),
|
||||
}
|
||||
|
||||
defer func() {
|
||||
ar.state.add(curState)
|
||||
}()
|
||||
|
||||
ar.alertsMu.Lock()
|
||||
defer ar.alertsMu.Unlock()
|
||||
qMetrics, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||
ar.mu.Lock()
|
||||
defer ar.mu.Unlock()
|
||||
|
||||
ar.lastExecTime = start
|
||||
ar.lastExecDuration = time.Since(start)
|
||||
ar.lastExecError = err
|
||||
ar.lastExecSamples = len(qMetrics)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||
}
|
||||
|
||||
ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.samples, curState.duration)
|
||||
|
||||
for h, a := range ar.alerts {
|
||||
// cleanup inactive alerts from previous Exec
|
||||
if a.State == notifier.StateInactive && ts.Sub(a.ResolvedAt) > resolvedRetention {
|
||||
ar.logDebugf(ts, a, "deleted as inactive")
|
||||
delete(ar.alerts, h)
|
||||
}
|
||||
}
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res, err
|
||||
}
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return ar.q.Query(ctx, query, ts) }
|
||||
updated := make(map[uint64]struct{})
|
||||
// update list of active alerts
|
||||
for _, m := range qMetrics {
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
curState.err = fmt.Errorf("failed to expand labels: %s", err)
|
||||
return nil, curState.err
|
||||
return nil, fmt.Errorf("failed to expand labels: %s", err)
|
||||
}
|
||||
h := hash(ls.processed)
|
||||
if _, ok := updated[h]; ok {
|
||||
// duplicate may be caused by extra labels
|
||||
// conflicting with the metric labels
|
||||
curState.err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
||||
return nil, curState.err
|
||||
ar.lastExecError = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
|
||||
return nil, ar.lastExecError
|
||||
}
|
||||
updated[h] = struct{}{}
|
||||
if a, ok := ar.alerts[h]; ok {
|
||||
@@ -340,26 +285,28 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
// back to notifier.StatePending
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = ts
|
||||
ar.logDebugf(ts, a, "INACTIVE => PENDING")
|
||||
}
|
||||
a.Value = m.Values[0]
|
||||
// re-exec template since Value or query can be used in annotations
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if a.Value != m.Values[0] {
|
||||
// update Value field with latest value
|
||||
a.Value = m.Values[0]
|
||||
// and re-exec template since Value can be used
|
||||
// in annotations
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
a, err := ar.newAlert(m, ls, start, qFn)
|
||||
a, err := ar.newAlert(m, ls, ar.lastExecTime, qFn)
|
||||
if err != nil {
|
||||
curState.err = fmt.Errorf("failed to create alert: %w", err)
|
||||
return nil, curState.err
|
||||
ar.lastExecError = err
|
||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||
}
|
||||
a.ID = h
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = ts
|
||||
ar.alerts[h] = a
|
||||
ar.logDebugf(ts, a, "created in state PENDING")
|
||||
}
|
||||
var numActivePending int
|
||||
for h, a := range ar.alerts {
|
||||
@@ -370,13 +317,11 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
// alert was in Pending state - it is not
|
||||
// active anymore
|
||||
delete(ar.alerts, h)
|
||||
ar.logDebugf(ts, a, "PENDING => DELETED: is absent in current evaluation round")
|
||||
continue
|
||||
}
|
||||
if a.State == notifier.StateFiring {
|
||||
a.State = notifier.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
|
||||
}
|
||||
continue
|
||||
}
|
||||
@@ -385,13 +330,11 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
a.State = notifier.StateFiring
|
||||
a.Start = ts
|
||||
alertsFired.Inc()
|
||||
ar.logDebugf(ts, a, "PENDING => FIRING: %s since becoming active at %v", ts.Sub(a.ActiveAt), a.ActiveAt)
|
||||
}
|
||||
}
|
||||
if limit > 0 && numActivePending > limit {
|
||||
ar.alerts = map[uint64]*notifier.Alert{}
|
||||
curState.err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
||||
return nil, curState.err
|
||||
return nil, fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
||||
}
|
||||
return ar.toTimeSeries(ts.Unix()), nil
|
||||
}
|
||||
@@ -461,7 +404,6 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||
Value: m.Values[0],
|
||||
ActiveAt: start,
|
||||
Expr: ar.Expr,
|
||||
For: ar.For,
|
||||
}
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ls.origin, ar.Annotations)
|
||||
return a, err
|
||||
@@ -469,8 +411,8 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
|
||||
|
||||
// AlertAPI generates APIAlert object from alert by its id(hash)
|
||||
func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
ar.mu.RLock()
|
||||
defer ar.mu.RUnlock()
|
||||
a, ok := ar.alerts[id]
|
||||
if !ok {
|
||||
return nil
|
||||
@@ -478,10 +420,9 @@ func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
|
||||
return ar.newAlertAPI(*a)
|
||||
}
|
||||
|
||||
// ToAPI returns Rule representation in form of APIRule
|
||||
// Isn't thread-safe. Call must be protected by AlertingRule mutex.
|
||||
// ToAPI returns Rule representation in form
|
||||
// of APIRule
|
||||
func (ar *AlertingRule) ToAPI() APIRule {
|
||||
lastState := ar.state.getLast()
|
||||
r := APIRule{
|
||||
Type: "alerting",
|
||||
DatasourceType: ar.Type.String(),
|
||||
@@ -490,21 +431,19 @@ func (ar *AlertingRule) ToAPI() APIRule {
|
||||
Duration: ar.For.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
LastEvaluation: ar.lastExecTime,
|
||||
EvaluationTime: ar.lastExecDuration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.samples,
|
||||
MaxUpdates: ar.state.size(),
|
||||
Updates: ar.state.getAll(),
|
||||
LastSamples: ar.lastExecSamples,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
}
|
||||
if lastState.err != nil {
|
||||
r.LastError = lastState.err.Error()
|
||||
if ar.lastExecError != nil {
|
||||
r.LastError = ar.lastExecError.Error()
|
||||
r.Health = "err"
|
||||
}
|
||||
// satisfy APIRule.State logic
|
||||
@@ -524,14 +463,14 @@ func (ar *AlertingRule) ToAPI() APIRule {
|
||||
// AlertsToAPI generates list of APIAlert objects from existing alerts
|
||||
func (ar *AlertingRule) AlertsToAPI() []*APIAlert {
|
||||
var alerts []*APIAlert
|
||||
ar.alertsMu.RLock()
|
||||
ar.mu.RLock()
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateInactive {
|
||||
continue
|
||||
}
|
||||
alerts = append(alerts, ar.newAlertAPI(*a))
|
||||
}
|
||||
ar.alertsMu.RUnlock()
|
||||
ar.mu.RUnlock()
|
||||
return alerts
|
||||
}
|
||||
|
||||
@@ -614,10 +553,7 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
||||
}
|
||||
|
||||
ts := time.Now()
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res, err
|
||||
}
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return ar.q.Query(ctx, query, ts) }
|
||||
|
||||
// account for external labels in filter
|
||||
var labelsFilter string
|
||||
@@ -627,7 +563,7 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
||||
|
||||
expr := fmt.Sprintf("last_over_time(%s{alertname=%q%s}[%ds])",
|
||||
alertForStateMetricName, ar.Name, labelsFilter, int(lookback.Seconds()))
|
||||
qMetrics, _, err := q.Query(ctx, expr, ts)
|
||||
qMetrics, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -700,25 +700,14 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
expAlerts map[uint64]*notifier.Alert
|
||||
}{
|
||||
{
|
||||
&AlertingRule{
|
||||
Name: "common",
|
||||
Labels: map[string]string{
|
||||
"region": "east",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `{{ $labels.alertname }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
},
|
||||
newTestRuleWithLabels("common", "region", "east"),
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
||||
Annotations: map[string]string{
|
||||
"summary": `common: Too high connection number for "foo"`,
|
||||
},
|
||||
Annotations: map[string]string{},
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "common",
|
||||
"region": "east",
|
||||
@@ -726,9 +715,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
},
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "bar"}): {
|
||||
Annotations: map[string]string{
|
||||
"summary": `common: Too high connection number for "bar"`,
|
||||
},
|
||||
Annotations: map[string]string{},
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "common",
|
||||
"region": "east",
|
||||
@@ -817,7 +804,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
tc.rule.GroupID = fakeGroup.ID()
|
||||
tc.rule.q = fq
|
||||
tc.rule.state = newRuleState(10)
|
||||
fq.add(tc.metrics...)
|
||||
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
@@ -929,11 +915,5 @@ func newTestRuleWithLabels(name string, labels ...string) *AlertingRule {
|
||||
}
|
||||
|
||||
func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
||||
return &AlertingRule{
|
||||
Name: name,
|
||||
For: waitFor,
|
||||
EvalInterval: waitFor,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: newRuleState(10),
|
||||
}
|
||||
return &AlertingRule{Name: name, alerts: make(map[uint64]*notifier.Alert), For: waitFor, EvalInterval: waitFor}
|
||||
}
|
||||
|
||||
@@ -77,7 +77,7 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
ruleName = r.Alert
|
||||
}
|
||||
if _, ok := uniqueRules[r.ID]; ok {
|
||||
return fmt.Errorf("%q is a duplicate within the group %q", r.String(), g.Name)
|
||||
return fmt.Errorf("rule %q duplicate", ruleName)
|
||||
}
|
||||
uniqueRules[r.ID] = struct{}{}
|
||||
if err := r.Validate(); err != nil {
|
||||
@@ -113,10 +113,6 @@ type Rule struct {
|
||||
For *promutils.Duration `yaml:"for,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
Debug bool `yaml:"debug,omitempty"`
|
||||
// UpdateEntriesLimit defines max number of rule's state updates stored in memory.
|
||||
// Overrides `-rule.updateEntriesLimit`.
|
||||
UpdateEntriesLimit *int `yaml:"update_entries_limit,omitempty"`
|
||||
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
XXX map[string]interface{} `yaml:",inline"`
|
||||
@@ -140,32 +136,6 @@ func (r *Rule) Name() string {
|
||||
return r.Alert
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
func (r *Rule) String() string {
|
||||
ruleType := "recording"
|
||||
if r.Alert != "" {
|
||||
ruleType = "alerting"
|
||||
}
|
||||
b := strings.Builder{}
|
||||
b.WriteString(fmt.Sprintf("%s rule %q", ruleType, r.Name()))
|
||||
b.WriteString(fmt.Sprintf("; expr: %q", r.Expr))
|
||||
|
||||
kv := sortMap(r.Labels)
|
||||
for i := range kv {
|
||||
if i == 0 {
|
||||
b.WriteString("; labels:")
|
||||
}
|
||||
b.WriteString(" ")
|
||||
b.WriteString(kv[i].key)
|
||||
b.WriteString("=")
|
||||
b.WriteString(kv[i].value)
|
||||
if i < len(kv)-1 {
|
||||
b.WriteString(",")
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// HashRule hashes significant Rule fields into
|
||||
// unique hash that supposed to define Rule uniqueness
|
||||
func HashRule(r Rule) uint64 {
|
||||
@@ -246,12 +216,9 @@ func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressio
|
||||
func parseFile(path string) ([]Group, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading alert rule file %q: %w", path, err)
|
||||
}
|
||||
data, err = envtemplate.ReplaceBytes(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot expand environment vars in %q: %w", path, err)
|
||||
return nil, fmt.Errorf("error reading alert rule file: %w", err)
|
||||
}
|
||||
data = envtemplate.Replace(data)
|
||||
g := struct {
|
||||
Groups []Group `yaml:"groups"`
|
||||
// Catches all undefined fields and must be empty after parsing.
|
||||
|
||||
@@ -535,35 +535,6 @@ headers:
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`)
|
||||
})
|
||||
|
||||
t.Run("`debug` change", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
debug: true
|
||||
`)
|
||||
})
|
||||
t.Run("`update_entries_limit` change", func(t *testing.T) {
|
||||
f(t, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
`, `
|
||||
name: TestGroup
|
||||
rules:
|
||||
- alert: foo
|
||||
expr: sum by(job) (up == 1)
|
||||
update_entries_limit: 33
|
||||
`)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
groups:
|
||||
- name: TestGroup
|
||||
interval: 5s
|
||||
interval: 2s
|
||||
concurrency: 2
|
||||
limit: 1000
|
||||
headers:
|
||||
@@ -9,19 +9,15 @@ groups:
|
||||
denyPartialResponse: ["true"]
|
||||
rules:
|
||||
- alert: Conns
|
||||
expr: vm_tcplistener_conns > 0
|
||||
expr: sum(vm_tcplistener_conns) by(instance) > 1
|
||||
for: 3m
|
||||
debug: true
|
||||
update_entries_limit: 40
|
||||
annotations:
|
||||
labels: "Available labels: {{ $labels }}"
|
||||
summary: Too high connection number for {{ $labels.instance }}
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
{{ with printf "sum(vm_tcplistener_conns{instance=%q})" .Labels.instance | query }}
|
||||
{{ . | first | value }}
|
||||
{{ end }}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
update_entries_limit: -1
|
||||
expr: sum by(job)
|
||||
(up == 1)
|
||||
labels:
|
||||
|
||||
@@ -7,7 +7,6 @@ groups:
|
||||
- alert: Conns
|
||||
expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500)
|
||||
for: 3m
|
||||
|
||||
annotations:
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
|
||||
@@ -2,27 +2,18 @@ package datasource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Querier interface wraps Query and QueryRange methods
|
||||
type Querier interface {
|
||||
// Query executes instant request with the given query at the given ts.
|
||||
// It returns list of Metric in response, the http.Request used for sending query
|
||||
// and error if any. Returned http.Request can't be reused and its body is already read.
|
||||
// Query should stop once ctx is cancelled.
|
||||
Query(ctx context.Context, query string, ts time.Time) ([]Metric, *http.Request, error)
|
||||
// QueryRange executes range request with the given query on the given time range.
|
||||
// It returns list of Metric in response and error if any.
|
||||
// QueryRange should stop once ctx is cancelled.
|
||||
Query(ctx context.Context, query string, ts time.Time) ([]Metric, error)
|
||||
QueryRange(ctx context.Context, query string, from, to time.Time) ([]Metric, error)
|
||||
}
|
||||
|
||||
// QuerierBuilder builds Querier with given params.
|
||||
type QuerierBuilder interface {
|
||||
// BuildWithParams creates a new Querier object with the given params
|
||||
BuildWithParams(params QuerierParams) Querier
|
||||
}
|
||||
|
||||
@@ -32,7 +23,6 @@ type QuerierParams struct {
|
||||
EvaluationInterval time.Duration
|
||||
QueryParams url.Values
|
||||
Headers map[string]string
|
||||
Debug bool
|
||||
}
|
||||
|
||||
// Metric is the basic entity which should be return by datasource
|
||||
@@ -54,19 +44,6 @@ func (m *Metric) SetLabel(key, value string) {
|
||||
m.AddLabel(key, value)
|
||||
}
|
||||
|
||||
// SetLabels sets the given map as Metric labels
|
||||
func (m *Metric) SetLabels(ls map[string]string) {
|
||||
var i int
|
||||
m.Labels = make([]Label, len(ls))
|
||||
for k, v := range ls {
|
||||
m.Labels[i] = Label{
|
||||
Name: k,
|
||||
Value: v,
|
||||
}
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
// AddLabel appends the given label to the label set
|
||||
func (m *Metric) AddLabel(key, value string) {
|
||||
m.Labels = append(m.Labels, Label{Name: key, Value: value})
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
@@ -14,7 +13,7 @@ import (
|
||||
|
||||
var (
|
||||
addr = flag.String("datasource.url", "", "Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. "+
|
||||
"E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL")
|
||||
"E.g. http://127.0.0.1:8428 . See also '-datasource.disablePathAppend', '-datasource.showURL'.")
|
||||
appendTypePrefix = flag.Bool("datasource.appendTypePrefix", false, "Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.")
|
||||
showDatasourceURL = flag.Bool("datasource.showURL", false, "Whether to show -datasource.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
@@ -43,9 +42,9 @@ var (
|
||||
oauth2Scopes = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")
|
||||
|
||||
lookBack = flag.Duration("datasource.lookback", 0, `Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
|
||||
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
|
||||
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
|
||||
"If set to 0, rule's evaluation interval will be used instead.")
|
||||
queryStep = flag.Duration("datasource.queryStep", 0, "queryStep defines how far a value can fallback to when evaluating queries. "+
|
||||
"For example, if datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query."+
|
||||
"If queryStep isn't specified, rule's evaluationInterval will be used instead.")
|
||||
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Whether to align "time" parameter with evaluation interval.`+
|
||||
"Alignment supposed to produce deterministic results despite of number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
|
||||
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
)
|
||||
|
||||
@@ -40,10 +39,6 @@ type VMStorage struct {
|
||||
evaluationInterval time.Duration
|
||||
extraParams url.Values
|
||||
extraHeaders []keyValue
|
||||
|
||||
// whether to print additional log messages
|
||||
// for each sent request
|
||||
debug bool
|
||||
}
|
||||
|
||||
type keyValue struct {
|
||||
@@ -69,7 +64,6 @@ func (s *VMStorage) ApplyParams(params QuerierParams) *VMStorage {
|
||||
s.dataSourceType = toDatasourceType(params.DataSourceType)
|
||||
s.evaluationInterval = params.EvaluationInterval
|
||||
s.extraParams = params.QueryParams
|
||||
s.debug = params.Debug
|
||||
if params.Headers != nil {
|
||||
for key, value := range params.Headers {
|
||||
kv := keyValue{key: key, value: value}
|
||||
@@ -98,10 +92,10 @@ func NewVMStorage(baseURL string, authCfg *promauth.Config, lookBack time.Durati
|
||||
}
|
||||
|
||||
// Query executes the given query and returns parsed response
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Metric, *http.Request, error) {
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Metric, error) {
|
||||
req, err := s.newRequestPOST()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch s.dataSourceType {
|
||||
@@ -110,12 +104,12 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Me
|
||||
case datasourceGraphite:
|
||||
s.setGraphiteReqParams(req, query, ts)
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
|
||||
return nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
|
||||
}
|
||||
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
return nil, req, err
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
@@ -125,8 +119,7 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Me
|
||||
if s.dataSourceType != datasourcePrometheus {
|
||||
parseFn = parseGraphiteResponse
|
||||
}
|
||||
result, err := parseFn(req, resp)
|
||||
return result, req, err
|
||||
return parseFn(req, resp)
|
||||
}
|
||||
|
||||
// QueryRange executes the given query on the given time range.
|
||||
@@ -158,9 +151,6 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
|
||||
}
|
||||
|
||||
func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response, error) {
|
||||
if s.debug {
|
||||
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, req.URL.RawQuery)
|
||||
}
|
||||
resp, err := s.c.Do(req.WithContext(ctx))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting response from %s: %w", req.URL.Redacted(), err)
|
||||
|
||||
@@ -32,17 +32,19 @@ type promInstant struct {
|
||||
}
|
||||
|
||||
func (r promInstant) metrics() ([]Metric, error) {
|
||||
result := make([]Metric, len(r.Result))
|
||||
var result []Metric
|
||||
for i, res := range r.Result {
|
||||
f, err := strconv.ParseFloat(res.TV[1].(string), 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("metric %v, unable to parse float64 from %s: %w", res, res.TV[1], err)
|
||||
}
|
||||
var m Metric
|
||||
m.SetLabels(res.Labels)
|
||||
for k, v := range r.Result[i].Labels {
|
||||
m.AddLabel(k, v)
|
||||
}
|
||||
m.Timestamps = append(m.Timestamps, int64(res.TV[0].(float64)))
|
||||
m.Values = append(m.Values, f)
|
||||
result[i] = m
|
||||
result = append(result, m)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -147,16 +149,6 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
|
||||
timestamp = timestamp.Truncate(s.evaluationInterval)
|
||||
}
|
||||
q.Set("time", fmt.Sprintf("%d", timestamp.Unix()))
|
||||
if s.evaluationInterval > 0 { // set step as evaluationInterval by default
|
||||
// always convert to seconds to keep compatibility with older
|
||||
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
|
||||
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
|
||||
}
|
||||
if s.queryStep > 0 { // override step with user-specified value
|
||||
// always convert to seconds to keep compatibility with older
|
||||
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
|
||||
q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds())))
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
s.setPrometheusReqParams(r, query)
|
||||
}
|
||||
@@ -171,11 +163,6 @@ func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, s
|
||||
q := r.URL.Query()
|
||||
q.Add("start", fmt.Sprintf("%d", start.Unix()))
|
||||
q.Add("end", fmt.Sprintf("%d", end.Unix()))
|
||||
if s.evaluationInterval > 0 { // set step as evaluationInterval by default
|
||||
// always convert to seconds to keep compatibility with older
|
||||
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
|
||||
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
s.setPrometheusReqParams(r, query)
|
||||
}
|
||||
@@ -191,5 +178,15 @@ func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) {
|
||||
}
|
||||
}
|
||||
q.Set("query", query)
|
||||
if s.evaluationInterval > 0 { // set step as evaluationInterval by default
|
||||
// always convert to seconds to keep compatibility with older
|
||||
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
|
||||
q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds())))
|
||||
}
|
||||
if s.queryStep > 0 { // override step with user-specified value
|
||||
// always convert to seconds to keep compatibility with older
|
||||
// Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943
|
||||
q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds())))
|
||||
}
|
||||
r.URL.RawQuery = q.Encode()
|
||||
}
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
package datasource
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkMetrics(b *testing.B) {
|
||||
payload := []byte(`[{"metric":{"__name__":"vm_rows"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests", "foo":"bar", "baz": "qux"},"value":[1583786140,"2000"]}]`)
|
||||
|
||||
var pi promInstant
|
||||
if err := json.Unmarshal(payload, &pi.Result); err != nil {
|
||||
b.Fatalf(err.Error())
|
||||
}
|
||||
b.Run("Instant", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, _ = pi.metrics()
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
@@ -75,7 +74,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
case 5:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
|
||||
case 6:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests"},"value":[1583786140,"2000"]}]}}`))
|
||||
case 7:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
}
|
||||
@@ -95,7 +94,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
ts := time.Now()
|
||||
|
||||
expErr := func(err string) {
|
||||
if _, _, err := pq.Query(ctx, query, ts); err == nil {
|
||||
if _, err := pq.Query(ctx, query, ts); err == nil {
|
||||
t.Fatalf("expected %q got nil", err)
|
||||
}
|
||||
}
|
||||
@@ -107,7 +106,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
expErr("unknown status") // 4
|
||||
expErr("non-vector resultType error") // 5
|
||||
|
||||
m, _, err := pq.Query(ctx, query, ts) // 6 - vector
|
||||
m, err := pq.Query(ctx, query, ts) // 6 - vector
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -116,25 +115,24 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
}
|
||||
expected := []Metric{
|
||||
{
|
||||
Labels: []Label{{Value: "vm_rows", Name: "__name__"}, {Value: "bar", Name: "foo"}},
|
||||
Labels: []Label{{Value: "vm_rows", Name: "__name__"}},
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{13763},
|
||||
},
|
||||
{
|
||||
Labels: []Label{{Value: "vm_requests", Name: "__name__"}, {Value: "baz", Name: "foo"}},
|
||||
Labels: []Label{{Value: "vm_requests", Name: "__name__"}},
|
||||
Timestamps: []int64{1583786140},
|
||||
Values: []float64{2000},
|
||||
},
|
||||
}
|
||||
metricsEqual(t, m, expected)
|
||||
if !reflect.DeepEqual(m, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m, expected)
|
||||
}
|
||||
|
||||
m, req, err := pq.Query(ctx, query, ts) // 7 - scalar
|
||||
m, err = pq.Query(ctx, query, ts) // 7 - scalar
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if req == nil {
|
||||
t.Fatalf("expected request to be non-nil")
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(m), m)
|
||||
}
|
||||
@@ -150,43 +148,20 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
m, _, err = gq.Query(ctx, queryRender, ts) // 8 - graphite
|
||||
m, err = gq.Query(ctx, queryRender, ts) // 8 - graphite
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
exp := []Metric{
|
||||
{
|
||||
Labels: []Label{{Value: "constantLine(10)", Name: "name"}},
|
||||
Timestamps: []int64{1611758403},
|
||||
Values: []float64{10},
|
||||
},
|
||||
exp := Metric{
|
||||
Labels: []Label{{Value: "constantLine(10)", Name: "name"}},
|
||||
Timestamps: []int64{1611758403},
|
||||
Values: []float64{10},
|
||||
}
|
||||
metricsEqual(t, m, exp)
|
||||
}
|
||||
|
||||
func metricsEqual(t *testing.T, gotM, expectedM []Metric) {
|
||||
for i, exp := range expectedM {
|
||||
got := gotM[i]
|
||||
gotTS, expTS := got.Timestamps, exp.Timestamps
|
||||
if !reflect.DeepEqual(gotTS, expTS) {
|
||||
t.Fatalf("unexpected timestamps %+v want %+v", gotTS, expTS)
|
||||
}
|
||||
gotV, expV := got.Values, exp.Values
|
||||
if !reflect.DeepEqual(gotV, expV) {
|
||||
t.Fatalf("unexpected values %+v want %+v", gotV, expV)
|
||||
}
|
||||
sort.Slice(got.Labels, func(i, j int) bool {
|
||||
return got.Labels[i].Name < got.Labels[j].Name
|
||||
})
|
||||
sort.Slice(exp.Labels, func(i, j int) bool {
|
||||
return exp.Labels[i].Name < exp.Labels[j].Name
|
||||
})
|
||||
if !reflect.DeepEqual(exp.Labels, got.Labels) {
|
||||
t.Fatalf("unexpected labels %+v want %+v", got.Labels, exp.Labels)
|
||||
}
|
||||
if !reflect.DeepEqual(m[0], exp) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,10 +196,6 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
if _, err := strconv.ParseInt(endTS, 10, 64); err != nil {
|
||||
t.Errorf("failed to parse 'end' query param: %s", err)
|
||||
}
|
||||
step := r.URL.Query().Get("step")
|
||||
if step != "15s" {
|
||||
t.Errorf("expected 'step' query param to be 15s; got %q instead", step)
|
||||
}
|
||||
switch c {
|
||||
case 0:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
||||
@@ -238,7 +209,7 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected: %s", err)
|
||||
}
|
||||
s := NewVMStorage(srv.URL, authCfg, time.Minute, *queryStep, false, srv.Client())
|
||||
s := NewVMStorage(srv.URL, authCfg, time.Minute, 0, false, srv.Client())
|
||||
|
||||
pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus), EvaluationInterval: 15 * time.Second})
|
||||
|
||||
|
||||
@@ -168,12 +168,9 @@ func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, lookb
|
||||
if rr.For < 1 {
|
||||
continue
|
||||
}
|
||||
// ignore QueryParams on purpose, because they could contain
|
||||
// query filters. This may affect the restore procedure.
|
||||
q := qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: g.Type.String(),
|
||||
Headers: g.Headers,
|
||||
})
|
||||
// ignore g.ExtraFilterLabels on purpose, so it
|
||||
// won't affect the restore procedure.
|
||||
q := qb.BuildWithParams(datasource.QuerierParams{})
|
||||
if err := rr.Restore(ctx, q, lookback, labels); err != nil {
|
||||
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
|
||||
}
|
||||
@@ -421,26 +418,20 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
|
||||
}
|
||||
|
||||
errGr := new(utils.ErrGroup)
|
||||
if e.rw != nil {
|
||||
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
|
||||
var lastErr error
|
||||
pushToRW := func(tss []prompbmarshal.TimeSeries) {
|
||||
for _, ts := range tss {
|
||||
remoteWriteTotal.Inc()
|
||||
if err := e.rw.Push(ts); err != nil {
|
||||
remoteWriteErrors.Inc()
|
||||
lastErr = fmt.Errorf("rule %q: remote write failure: %w", rule, err)
|
||||
errGr.Add(fmt.Errorf("rule %q: remote write failure: %w", rule, err))
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
if err := pushToRW(tss); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pushToRW(tss)
|
||||
staleSeries := e.getStaleSeries(rule, tss, ts)
|
||||
if err := pushToRW(staleSeries); err != nil {
|
||||
return err
|
||||
}
|
||||
pushToRW(staleSeries)
|
||||
}
|
||||
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
@@ -454,7 +445,6 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
errGr := new(utils.ErrGroup)
|
||||
for _, nt := range e.notifiers() {
|
||||
wg.Add(1)
|
||||
go func(nt notifier.Notifier) {
|
||||
|
||||
@@ -3,6 +3,8 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
@@ -10,9 +12,6 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
@@ -453,24 +452,3 @@ func TestFaultyNotifier(t *testing.T) {
|
||||
}
|
||||
t.Fatalf("alive notifier didn't receive notification by %v", deadline)
|
||||
}
|
||||
|
||||
func TestFaultyRW(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
||||
|
||||
r := &RecordingRule{
|
||||
Name: "test",
|
||||
state: newRuleState(10),
|
||||
q: fq,
|
||||
}
|
||||
|
||||
e := &executor{
|
||||
rw: &remotewrite.Client{},
|
||||
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
|
||||
}
|
||||
|
||||
err := e.exec(context.Background(), r, time.Now(), 0, 10)
|
||||
if err == nil {
|
||||
t.Fatalf("expected to get an error from faulty RW client, got nil instead")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync"
|
||||
@@ -45,20 +44,18 @@ func (fq *fakeQuerier) BuildWithParams(_ datasource.QuerierParams) datasource.Qu
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) ([]datasource.Metric, error) {
|
||||
req, _, err := fq.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
return fq.Query(ctx, q, time.Now())
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) ([]datasource.Metric, *http.Request, error) {
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) ([]datasource.Metric, error) {
|
||||
fq.Lock()
|
||||
defer fq.Unlock()
|
||||
if fq.err != nil {
|
||||
return nil, nil, fq.err
|
||||
return nil, fq.err
|
||||
}
|
||||
cp := make([]datasource.Metric, len(fq.metrics))
|
||||
copy(cp, fq.metrics)
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
return cp, req, nil
|
||||
return cp, nil
|
||||
}
|
||||
|
||||
type fakeNotifier struct {
|
||||
|
||||
@@ -28,7 +28,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
rulePath = flagutil.NewArrayString("rule", `Path to the file with alert rules.
|
||||
rulePath = flagutil.NewArray("rule", `Path to the file with alert rules.
|
||||
Supports patterns. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule="/path/to/file". Path to a single file with alerting rules
|
||||
@@ -36,7 +36,7 @@ Examples:
|
||||
absolute path to all .yaml files in root.
|
||||
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.`)
|
||||
|
||||
ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions
|
||||
ruleTemplatesPath = flagutil.NewArray("rule.templates", `Path or glob pattern to location with go template definitions
|
||||
for rules annotations templating. Flag can be specified multiple times.
|
||||
Examples:
|
||||
-rule.templates="/path/to/file". Path to a single file with go templates
|
||||
@@ -56,17 +56,13 @@ absolute path to all .tpl files in root.`)
|
||||
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
||||
"which is by default equal to 3 evaluation intervals of the parent group.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overriden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
|
||||
|
||||
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
|
||||
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
|
||||
`for cases where you want to build a custom link to Grafana, Prometheus or any other service. `+
|
||||
`Supports templating - see https://docs.victoriametrics.com/vmalert.html#templating . `+
|
||||
`For example, link to Grafana: -external.alert.source='explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr":{{$expr|jsonEscape|queryEscape}} },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]' . `+
|
||||
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
|
||||
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
||||
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
|
||||
Supports templating. For example, link to Grafana: 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.
|
||||
If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
|
||||
externalLabels = flagutil.NewArray("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
||||
"Pass multiple -label flags in order to add multiple label sets.")
|
||||
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
|
||||
@@ -75,7 +71,7 @@ absolute path to all .tpl files in root.`)
|
||||
|
||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The `-rule` flag must be specified.")
|
||||
)
|
||||
|
||||
var alertURLGeneratorFn notifier.AlertURLGenerator
|
||||
@@ -268,7 +264,7 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
|
||||
"tpl": externalAlertSource,
|
||||
}
|
||||
return func(alert notifier.Alert) string {
|
||||
templated, err := alert.ExecTemplate(nil, alert.Labels, m)
|
||||
templated, err := alert.ExecTemplate(nil, nil, m)
|
||||
if err != nil {
|
||||
logger.Errorf("can not exec source template %s", err)
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ func TestGetExternalURL(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestGetAlertURLGenerator(t *testing.T) {
|
||||
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
|
||||
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4}
|
||||
u, _ := url.Parse("https://victoriametrics.com/path")
|
||||
fn, err := getAlertURLGenerator(u, "", false)
|
||||
if err != nil {
|
||||
@@ -48,11 +48,11 @@ func TestGetAlertURLGenerator(t *testing.T) {
|
||||
if err == nil {
|
||||
t.Errorf("expected template validation error got nil")
|
||||
}
|
||||
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
|
||||
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}", true)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %s", err)
|
||||
}
|
||||
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
|
||||
if exp := "https://victoriametrics.com/path/foo?query=4"; exp != fn(testAlert) {
|
||||
t.Errorf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,23 +30,6 @@ type manager struct {
|
||||
groups map[uint64]*Group
|
||||
}
|
||||
|
||||
// RuleAPI generates APIRule object from alert by its ID(hash)
|
||||
func (m *manager) RuleAPI(gID, rID uint64) (APIRule, error) {
|
||||
m.groupsMu.RLock()
|
||||
defer m.groupsMu.RUnlock()
|
||||
|
||||
g, ok := m.groups[gID]
|
||||
if !ok {
|
||||
return APIRule{}, fmt.Errorf("can't find group with id %d", gID)
|
||||
}
|
||||
for _, rule := range g.Rules {
|
||||
if rule.ID() == rID {
|
||||
return rule.ToAPI(), nil
|
||||
}
|
||||
}
|
||||
return APIRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
||||
}
|
||||
|
||||
// AlertAPI generates APIAlert object from alert by its ID(hash)
|
||||
func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
|
||||
m.groupsMu.RLock()
|
||||
@@ -87,9 +70,9 @@ func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) er
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack, m.labels)
|
||||
if err != nil {
|
||||
if !*remoteReadIgnoreRestoreErrors {
|
||||
return fmt.Errorf("failed to restore ruleState for group %q: %w", group.Name, err)
|
||||
return fmt.Errorf("failed to restore state for group %q: %w", group.Name, err)
|
||||
}
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", group.Name, err)
|
||||
logger.Errorf("error while restoring state for group %q: %s", group.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -45,8 +45,6 @@ type Alert struct {
|
||||
ID uint64
|
||||
// Restored is true if Alert was restored after restart
|
||||
Restored bool
|
||||
// For defines for how long Alert needs to be active to become StateFiring
|
||||
For time.Duration
|
||||
}
|
||||
|
||||
// AlertState type indicates the Alert state
|
||||
@@ -82,7 +80,6 @@ type AlertTplData struct {
|
||||
AlertID uint64
|
||||
GroupID uint64
|
||||
ActiveAt time.Time
|
||||
For time.Duration
|
||||
}
|
||||
|
||||
var tplHeaders = []string{
|
||||
@@ -94,7 +91,6 @@ var tplHeaders = []string{
|
||||
"{{ $alertID := .AlertID }}",
|
||||
"{{ $groupID := .GroupID }}",
|
||||
"{{ $activeAt := .ActiveAt }}",
|
||||
"{{ $for := .For }}",
|
||||
}
|
||||
|
||||
// ExecTemplate executes the Alert template for given
|
||||
@@ -102,15 +98,7 @@ var tplHeaders = []string{
|
||||
// Every alert could have a different datasource, so function
|
||||
// requires a queryFunction as an argument.
|
||||
func (a *Alert) ExecTemplate(q templates.QueryFn, labels, annotations map[string]string) (map[string]string, error) {
|
||||
tplData := AlertTplData{
|
||||
Value: a.Value,
|
||||
Labels: labels,
|
||||
Expr: a.Expr,
|
||||
AlertID: a.ID,
|
||||
GroupID: a.GroupID,
|
||||
ActiveAt: a.ActiveAt,
|
||||
For: a.For,
|
||||
}
|
||||
tplData := AlertTplData{Value: a.Value, Labels: labels, Expr: a.Expr, AlertID: a.ID, GroupID: a.GroupID, ActiveAt: a.ActiveAt}
|
||||
tmpl, err := templates.GetWithFuncs(templates.FuncsWithQuery(q))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting a template: %w", err)
|
||||
@@ -195,9 +183,9 @@ func (a Alert) toPromLabels(relabelCfg *promrelabel.ParsedConfigs) []prompbmarsh
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
if relabelCfg != nil {
|
||||
labels = relabelCfg.Apply(labels, 0)
|
||||
}
|
||||
promrelabel.SortLabels(labels)
|
||||
if relabelCfg != nil {
|
||||
return relabelCfg.Apply(labels, 0, false)
|
||||
}
|
||||
return labels
|
||||
}
|
||||
|
||||
@@ -54,35 +54,28 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||
"job": "staging",
|
||||
"instance": "localhost",
|
||||
},
|
||||
For: 5 * time.Minute,
|
||||
},
|
||||
annotations: map[string]string{
|
||||
"summary": "Too high connection number for {{$labels.instance}} for job {{$labels.job}}",
|
||||
"description": "It is {{ $value }} connections for {{$labels.instance}} for more than {{ .For }}",
|
||||
"description": "It is {{ $value }} connections for {{$labels.instance}}",
|
||||
},
|
||||
expTpl: map[string]string{
|
||||
"summary": "Too high connection number for localhost for job staging",
|
||||
"description": "It is 10000 connections for localhost for more than 5m0s",
|
||||
"description": "It is 10000 connections for localhost",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "expression-template",
|
||||
alert: &Alert{
|
||||
Expr: `vm_rows{"label"="bar"}<0`,
|
||||
Expr: `vm_rows{"label"="bar"}>0`,
|
||||
},
|
||||
annotations: map[string]string{
|
||||
"exprEscapedQuery": "{{ $expr|queryEscape }}",
|
||||
"exprEscapedPath": "{{ $expr|pathEscape }}",
|
||||
"exprEscapedJSON": "{{ $expr|jsonEscape }}",
|
||||
"exprEscapedQuotes": "{{ $expr|quotesEscape }}",
|
||||
"exprEscapedHTML": "{{ $expr|htmlEscape }}",
|
||||
"exprEscapedQuery": "{{ $expr|quotesEscape|queryEscape }}",
|
||||
"exprEscapedPath": "{{ $expr|quotesEscape|pathEscape }}",
|
||||
},
|
||||
expTpl: map[string]string{
|
||||
"exprEscapedQuery": "vm_rows%7B%22label%22%3D%22bar%22%7D%3C0",
|
||||
"exprEscapedPath": "vm_rows%7B%22label%22=%22bar%22%7D%3C0",
|
||||
"exprEscapedJSON": `"vm_rows{\"label\"=\"bar\"}\u003c0"`,
|
||||
"exprEscapedQuotes": `vm_rows{\"label\"=\"bar\"}\u003c0`,
|
||||
"exprEscapedHTML": "vm_rows{"label"="bar"}<0",
|
||||
"exprEscapedQuery": "vm_rows%7B%5C%22label%5C%22%3D%5C%22bar%5C%22%7D%3E0",
|
||||
"exprEscapedPath": "vm_rows%7B%5C%22label%5C%22=%5C%22bar%5C%22%7D%3E0",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -153,7 +146,7 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ActiveAt custom format",
|
||||
name: "ActiveAt custome format",
|
||||
alert: &Alert{
|
||||
ActiveAt: time.Date(2022, 8, 19, 20, 34, 58, 651387237, time.UTC),
|
||||
},
|
||||
@@ -240,7 +233,7 @@ func TestAlert_toPromLabels(t *testing.T) {
|
||||
replacement: "aaa"
|
||||
- action: labeldrop
|
||||
regex: "env.*"
|
||||
`))
|
||||
`), false)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
"endsAt":{%q= alert.End.Format(time.RFC3339Nano) %},
|
||||
{% endif %}
|
||||
"labels": {
|
||||
"alertname":{%q= alert.Name %}
|
||||
{% code lbls := alert.toPromLabels(relabelCfg) %}
|
||||
{% code ll := len(lbls) %}
|
||||
{% for idx, l := range lbls %}
|
||||
{%q= l.Name %}:{%q= l.Value %}{% if idx != ll-1 %}, {% endif %}
|
||||
{% for _, l := range lbls %}
|
||||
,{%q= l.Name %}:{%q= l.Value %}
|
||||
{% endfor %}
|
||||
},
|
||||
"annotations": {
|
||||
|
||||
@@ -51,27 +51,22 @@ func streamamRequest(qw422016 *qt422016.Writer, alerts []Alert, generatorURL fun
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:16
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:16
|
||||
qw422016.N().S(`"labels": {`)
|
||||
qw422016.N().S(`"labels": {"alertname":`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:18
|
||||
qw422016.N().Q(alert.Name)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:19
|
||||
lbls := alert.toPromLabels(relabelCfg)
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:19
|
||||
ll := len(lbls)
|
||||
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:20
|
||||
for idx, l := range lbls {
|
||||
for _, l := range lbls {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:20
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:21
|
||||
qw422016.N().Q(l.Name)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:21
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:21
|
||||
qw422016.N().Q(l.Value)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:21
|
||||
if idx != ll-1 {
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:21
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:21
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:22
|
||||
}
|
||||
//line app/vmalert/notifier/alertmanager_request.qtpl:22
|
||||
|
||||
@@ -67,6 +67,9 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
if a[0].GeneratorURL != "0/0" {
|
||||
t.Errorf("expected 0/0 as generatorURL got %s", a[0].GeneratorURL)
|
||||
}
|
||||
if a[0].Labels["alertname"] != "alert0" {
|
||||
t.Errorf("expected alert0 as alert name got %s", a[0].Labels["alertname"])
|
||||
}
|
||||
if a[0].StartsAt.IsZero() {
|
||||
t.Errorf("expected non-zero start time")
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
||||
@@ -67,8 +68,6 @@ type Config struct {
|
||||
// [ - '<host>' ]
|
||||
type StaticConfig struct {
|
||||
Targets []string `yaml:"targets"`
|
||||
// HTTPClientConfig contains HTTP configuration for the Targets
|
||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
@@ -83,12 +82,12 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
if cfg.Timeout.Duration() == 0 {
|
||||
cfg.Timeout = promutils.NewDuration(time.Second * 10)
|
||||
}
|
||||
rCfg, err := promrelabel.ParseRelabelConfigs(cfg.RelabelConfigs)
|
||||
rCfg, err := promrelabel.ParseRelabelConfigs(cfg.RelabelConfigs, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse relabeling config: %w", err)
|
||||
}
|
||||
cfg.parsedRelabelConfigs = rCfg
|
||||
arCfg, err := promrelabel.ParseRelabelConfigs(cfg.AlertRelabelConfigs)
|
||||
arCfg, err := promrelabel.ParseRelabelConfigs(cfg.AlertRelabelConfigs, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse alert relabeling config: %w", err)
|
||||
}
|
||||
@@ -129,24 +128,23 @@ func parseConfig(path string) (*Config, error) {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func parseLabels(target string, metaLabels *promutils.Labels, cfg *Config) (string, *promutils.Labels, error) {
|
||||
func parseLabels(target string, metaLabels map[string]string, cfg *Config) (string, []prompbmarshal.Label, error) {
|
||||
labels := mergeLabels(target, metaLabels, cfg)
|
||||
labels.Labels = cfg.parsedRelabelConfigs.Apply(labels.Labels, 0)
|
||||
labels.RemoveMetaLabels()
|
||||
labels.Sort()
|
||||
labels = cfg.parsedRelabelConfigs.Apply(labels, 0, false)
|
||||
labels = promrelabel.RemoveMetaLabels(labels[:0], labels)
|
||||
// Remove references to already deleted labels, so GC could clean strings for label name and label value past len(labels).
|
||||
// This should reduce memory usage when relabeling creates big number of temporary labels with long names and/or values.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825 for details.
|
||||
labels = labels.Clone()
|
||||
labels = append([]prompbmarshal.Label{}, labels...)
|
||||
|
||||
if labels.Len() == 0 {
|
||||
if len(labels) == 0 {
|
||||
return "", nil, nil
|
||||
}
|
||||
schemeRelabeled := labels.Get("__scheme__")
|
||||
schemeRelabeled := promrelabel.GetLabelValueByName(labels, "__scheme__")
|
||||
if len(schemeRelabeled) == 0 {
|
||||
schemeRelabeled = "http"
|
||||
}
|
||||
addressRelabeled := labels.Get("__address__")
|
||||
addressRelabeled := promrelabel.GetLabelValueByName(labels, "__address__")
|
||||
if len(addressRelabeled) == 0 {
|
||||
return "", nil, nil
|
||||
}
|
||||
@@ -154,7 +152,7 @@ func parseLabels(target string, metaLabels *promutils.Labels, cfg *Config) (stri
|
||||
return "", nil, nil
|
||||
}
|
||||
addressRelabeled = addMissingPort(schemeRelabeled, addressRelabeled)
|
||||
alertsPathRelabeled := labels.Get("__alerts_path__")
|
||||
alertsPathRelabeled := promrelabel.GetLabelValueByName(labels, "__alerts_path__")
|
||||
if !strings.HasPrefix(alertsPathRelabeled, "/") {
|
||||
alertsPathRelabeled = "/" + alertsPathRelabeled
|
||||
}
|
||||
@@ -178,12 +176,21 @@ func addMissingPort(scheme, target string) string {
|
||||
return target
|
||||
}
|
||||
|
||||
func mergeLabels(target string, metaLabels *promutils.Labels, cfg *Config) *promutils.Labels {
|
||||
func mergeLabels(target string, metaLabels map[string]string, cfg *Config) []prompbmarshal.Label {
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
|
||||
m := promutils.NewLabels(3 + metaLabels.Len())
|
||||
m.Add("__address__", target)
|
||||
m.Add("__scheme__", cfg.Scheme)
|
||||
m.Add("__alerts_path__", path.Join("/", cfg.PathPrefix, alertManagerPath))
|
||||
m.AddFrom(metaLabels)
|
||||
return m
|
||||
m := make(map[string]string)
|
||||
m["__address__"] = target
|
||||
m["__scheme__"] = cfg.Scheme
|
||||
m["__alerts_path__"] = path.Join("/", cfg.PathPrefix, alertManagerPath)
|
||||
for k, v := range metaLabels {
|
||||
m[k] = v
|
||||
}
|
||||
result := make([]prompbmarshal.Label, 0, len(m))
|
||||
for k, v := range m {
|
||||
result = append(result, prompbmarshal.Label{
|
||||
Name: k,
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -6,10 +6,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
// configWatcher supports dynamic reload of Notifier objects
|
||||
@@ -124,7 +122,7 @@ func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator)
|
||||
var errors []error
|
||||
duplicates := make(map[string]struct{})
|
||||
for _, labels := range metaLabels {
|
||||
target := labels.Get("__address__")
|
||||
target := labels["__address__"]
|
||||
u, processedLabels, err := parseLabels(target, labels, cfg)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
@@ -157,19 +155,18 @@ func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator)
|
||||
return targets, errors
|
||||
}
|
||||
|
||||
type getLabels func() ([]*promutils.Labels, error)
|
||||
type getLabels func() ([]map[string]string, error)
|
||||
|
||||
func (cw *configWatcher) start() error {
|
||||
if len(cw.cfg.StaticConfigs) > 0 {
|
||||
var targets []Target
|
||||
for _, cfg := range cw.cfg.StaticConfigs {
|
||||
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
|
||||
for _, target := range cfg.Targets {
|
||||
address, labels, err := parseLabels(target, nil, cw.cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse labels for target %q: %s", target, err)
|
||||
}
|
||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
|
||||
notifier, err := NewAlertManager(address, cw.genFn, cw.cfg.HTTPClientConfig, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init alertmanager for addr %q: %s", address, err)
|
||||
}
|
||||
@@ -183,8 +180,8 @@ func (cw *configWatcher) start() error {
|
||||
}
|
||||
|
||||
if len(cw.cfg.ConsulSDConfigs) > 0 {
|
||||
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutils.Labels, error) {
|
||||
var labels []*promutils.Labels
|
||||
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]map[string]string, error) {
|
||||
var labels []map[string]string
|
||||
for i := range cw.cfg.ConsulSDConfigs {
|
||||
sdc := &cw.cfg.ConsulSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
@@ -201,8 +198,8 @@ func (cw *configWatcher) start() error {
|
||||
}
|
||||
|
||||
if len(cw.cfg.DNSSDConfigs) > 0 {
|
||||
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutils.Labels, error) {
|
||||
var labels []*promutils.Labels
|
||||
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]map[string]string, error) {
|
||||
var labels []map[string]string
|
||||
for i := range cw.cfg.DNSSDConfigs {
|
||||
sdc := &cw.cfg.DNSSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
@@ -255,30 +252,3 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
|
||||
cw.targets[key] = targets
|
||||
cw.targetsMu.Unlock()
|
||||
}
|
||||
|
||||
// mergeHTTPClientConfigs merges fields between child and parent params
|
||||
// by populating child from parent params if they're missing.
|
||||
func mergeHTTPClientConfigs(parent, child promauth.HTTPClientConfig) promauth.HTTPClientConfig {
|
||||
if child.Authorization == nil {
|
||||
child.Authorization = parent.Authorization
|
||||
}
|
||||
if child.BasicAuth == nil {
|
||||
child.BasicAuth = parent.BasicAuth
|
||||
}
|
||||
if child.BearerToken == nil {
|
||||
child.BearerToken = parent.BearerToken
|
||||
}
|
||||
if child.BearerTokenFile == "" {
|
||||
child.BearerTokenFile = parent.BearerTokenFile
|
||||
}
|
||||
if child.OAuth2 == nil {
|
||||
child.OAuth2 = parent.OAuth2
|
||||
}
|
||||
if child.TLSConfig == nil {
|
||||
child.TLSConfig = parent.TLSConfig
|
||||
}
|
||||
if child.Headers == nil {
|
||||
child.Headers = parent.Headers
|
||||
}
|
||||
return child
|
||||
}
|
||||
|
||||
@@ -8,8 +8,6 @@ import (
|
||||
"os"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
)
|
||||
|
||||
func TestConfigWatcherReload(t *testing.T) {
|
||||
@@ -300,20 +298,3 @@ func newFakeConsulServer() *httptest.Server {
|
||||
|
||||
return httptest.NewServer(mux)
|
||||
}
|
||||
|
||||
func TestMergeHTTPClientConfigs(t *testing.T) {
|
||||
cfg1 := promauth.HTTPClientConfig{Headers: []string{"Header:Foo"}}
|
||||
cfg2 := promauth.HTTPClientConfig{BasicAuth: &promauth.BasicAuthConfig{
|
||||
Username: "foo",
|
||||
Password: promauth.NewSecret("bar"),
|
||||
}}
|
||||
|
||||
result := mergeHTTPClientConfigs(cfg1, cfg2)
|
||||
|
||||
if result.Headers == nil {
|
||||
t.Fatalf("expected Headers to be inherited")
|
||||
}
|
||||
if result.BasicAuth == nil {
|
||||
t.Fatalf("expected BasicAuth tp be present")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,40 +10,39 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
var (
|
||||
configPath = flag.String("notifier.config", "", "Path to configuration file for notifiers")
|
||||
suppressDuplicateTargetErrors = flag.Bool("notifier.suppressDuplicateTargetErrors", false, "Whether to suppress 'duplicate target' errors during discovery")
|
||||
|
||||
addrs = flagutil.NewArrayString("notifier.url", "Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. "+
|
||||
"List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.")
|
||||
addrs = flagutil.NewArray("notifier.url", "Prometheus alertmanager URL, e.g. http://127.0.0.1:9093")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
basicAuthPasswordFile = flagutil.NewArrayString("notifier.basicAuth.passwordFile", "Optional path to basic auth password file for -notifier.url")
|
||||
basicAuthUsername = flagutil.NewArray("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArray("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
basicAuthPasswordFile = flagutil.NewArray("notifier.basicAuth.passwordFile", "Optional path to basic auth password file for -notifier.url")
|
||||
|
||||
bearerToken = flagutil.NewArrayString("notifier.bearerToken", "Optional bearer token for -notifier.url")
|
||||
bearerTokenFile = flagutil.NewArrayString("notifier.bearerTokenFile", "Optional path to bearer token file for -notifier.url")
|
||||
bearerToken = flagutil.NewArray("notifier.bearerToken", "Optional bearer token for -notifier.url")
|
||||
bearerTokenFile = flagutil.NewArray("notifier.bearerTokenFile", "Optional path to bearer token file for -notifier.url")
|
||||
|
||||
tlsInsecureSkipVerify = flagutil.NewArrayBool("notifier.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to -notifier.url")
|
||||
tlsCertFile = flagutil.NewArrayString("notifier.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
|
||||
tlsKeyFile = flagutil.NewArrayString("notifier.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
|
||||
tlsCAFile = flagutil.NewArrayString("notifier.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
|
||||
tlsCertFile = flagutil.NewArray("notifier.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
|
||||
tlsKeyFile = flagutil.NewArray("notifier.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
|
||||
tlsCAFile = flagutil.NewArray("notifier.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
|
||||
"By default system CA is used")
|
||||
tlsServerName = flagutil.NewArrayString("notifier.tlsServerName", "Optional TLS server name to use for connections to -notifier.url. "+
|
||||
tlsServerName = flagutil.NewArray("notifier.tlsServerName", "Optional TLS server name to use for connections to -notifier.url. "+
|
||||
"By default the server name from -notifier.url is used")
|
||||
|
||||
oauth2ClientID = flagutil.NewArrayString("notifier.oauth2.clientID", "Optional OAuth2 clientID to use for -notifier.url. "+
|
||||
oauth2ClientID = flagutil.NewArray("notifier.oauth2.clientID", "Optional OAuth2 clientID to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2ClientSecret = flagutil.NewArrayString("notifier.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for -notifier.url. "+
|
||||
oauth2ClientSecret = flagutil.NewArray("notifier.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("notifier.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for -notifier.url. "+
|
||||
oauth2ClientSecretFile = flagutil.NewArray("notifier.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2TokenURL = flagutil.NewArrayString("notifier.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for -notifier.url. "+
|
||||
oauth2TokenURL = flagutil.NewArray("notifier.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for -notifier.url. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("notifier.oauth2.scopes", "Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'. "+
|
||||
oauth2Scopes = flagutil.NewArray("notifier.oauth2.scopes", "Optional OAuth2 scopes to use for -notifier.url. Scopes must be delimited by ';'. "+
|
||||
"If multiple args are set, then they are applied independently for the corresponding -notifier.url")
|
||||
)
|
||||
|
||||
@@ -160,7 +159,7 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
// list of labels added during discovery.
|
||||
type Target struct {
|
||||
Notifier
|
||||
Labels *promutils.Labels
|
||||
Labels []prompbmarshal.Label
|
||||
}
|
||||
|
||||
// TargetType defines how the Target was discovered
|
||||
|
||||
16
app/vmalert/notifier/testdata/static.good.yaml
vendored
16
app/vmalert/notifier/testdata/static.good.yaml
vendored
@@ -1,21 +1,7 @@
|
||||
headers:
|
||||
- 'CustomHeader: foo'
|
||||
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
basic_auth:
|
||||
username: foo
|
||||
password: bar
|
||||
|
||||
- targets:
|
||||
- localhost:9096
|
||||
- localhost:9097
|
||||
basic_auth:
|
||||
username: foo
|
||||
password: baz
|
||||
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
replacement: "aaa"
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
@@ -26,9 +27,19 @@ type RecordingRule struct {
|
||||
|
||||
q datasource.Querier
|
||||
|
||||
// state stores recent state changes
|
||||
// during evaluations
|
||||
state *ruleState
|
||||
// guard status fields
|
||||
mu sync.RWMutex
|
||||
// stores last moment of time Exec was called
|
||||
lastExecTime time.Time
|
||||
// stores the duration of the last Exec call
|
||||
lastExecDuration time.Duration
|
||||
// stores last error that happened in Exec func
|
||||
// resets on every successful Exec
|
||||
// may be used as Health state
|
||||
lastExecError error
|
||||
// stores the number of samples returned during
|
||||
// the last evaluation
|
||||
lastExecSamples int
|
||||
|
||||
metrics *recordingRuleMetrics
|
||||
}
|
||||
@@ -66,25 +77,21 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
||||
}),
|
||||
}
|
||||
|
||||
if cfg.UpdateEntriesLimit != nil {
|
||||
rr.state = newRuleState(*cfg.UpdateEntriesLimit)
|
||||
} else {
|
||||
rr.state = newRuleState(*ruleUpdateEntriesLimit)
|
||||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
|
||||
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
if e.err == nil {
|
||||
rr.mu.RLock()
|
||||
defer rr.mu.RUnlock()
|
||||
if rr.lastExecError == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
rr.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
return float64(e.samples)
|
||||
rr.mu.RLock()
|
||||
defer rr.mu.RUnlock()
|
||||
return float64(rr.lastExecSamples)
|
||||
})
|
||||
return rr
|
||||
}
|
||||
@@ -119,29 +126,21 @@ func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([
|
||||
|
||||
// Exec executes RecordingRule expression via the given Querier.
|
||||
func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
start := time.Now()
|
||||
qMetrics, req, err := rr.q.Query(ctx, rr.Expr, ts)
|
||||
curState := ruleStateEntry{
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(qMetrics),
|
||||
curl: requestToCurl(req),
|
||||
}
|
||||
|
||||
defer func() {
|
||||
rr.state.add(curState)
|
||||
}()
|
||||
qMetrics, err := rr.q.Query(ctx, rr.Expr, ts)
|
||||
rr.mu.Lock()
|
||||
defer rr.mu.Unlock()
|
||||
|
||||
rr.lastExecTime = ts
|
||||
rr.lastExecDuration = time.Since(ts)
|
||||
rr.lastExecError = err
|
||||
rr.lastExecSamples = len(qMetrics)
|
||||
if err != nil {
|
||||
curState.err = fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
|
||||
return nil, curState.err
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
|
||||
}
|
||||
|
||||
numSeries := len(qMetrics)
|
||||
if limit > 0 && numSeries > limit {
|
||||
curState.err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
|
||||
return nil, curState.err
|
||||
return nil, fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
|
||||
}
|
||||
|
||||
duplicates := make(map[string]struct{}, len(qMetrics))
|
||||
@@ -150,8 +149,8 @@ func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||
ts := rr.toTimeSeries(r)
|
||||
key := stringifyLabels(ts)
|
||||
if _, ok := duplicates[key]; ok {
|
||||
curState.err = fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
|
||||
return nil, curState.err
|
||||
rr.lastExecError = errDuplicate
|
||||
return nil, fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
|
||||
}
|
||||
duplicates[key] = struct{}{}
|
||||
tss = append(tss, ts)
|
||||
@@ -206,26 +205,23 @@ func (rr *RecordingRule) UpdateWith(r Rule) error {
|
||||
// ToAPI returns Rule's representation in form
|
||||
// of APIRule
|
||||
func (rr *RecordingRule) ToAPI() APIRule {
|
||||
lastState := rr.state.getLast()
|
||||
r := APIRule{
|
||||
Type: "recording",
|
||||
DatasourceType: rr.Type.String(),
|
||||
Name: rr.Name,
|
||||
Query: rr.Expr,
|
||||
Labels: rr.Labels,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
LastEvaluation: rr.lastExecTime,
|
||||
EvaluationTime: rr.lastExecDuration.Seconds(),
|
||||
Health: "ok",
|
||||
LastSamples: lastState.samples,
|
||||
MaxUpdates: rr.state.size(),
|
||||
Updates: rr.state.getAll(),
|
||||
|
||||
LastSamples: rr.lastExecSamples,
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", rr.ID()),
|
||||
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
||||
}
|
||||
if lastState.err != nil {
|
||||
r.LastError = lastState.err.Error()
|
||||
|
||||
if rr.lastExecError != nil {
|
||||
r.LastError = rr.lastExecError.Error()
|
||||
r.Health = "err"
|
||||
}
|
||||
return r
|
||||
|
||||
@@ -52,11 +52,9 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
&RecordingRule{
|
||||
Name: "job:foo",
|
||||
Labels: map[string]string{
|
||||
"source": "test",
|
||||
}},
|
||||
&RecordingRule{Name: "job:foo", Labels: map[string]string{
|
||||
"source": "test",
|
||||
}},
|
||||
[]datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar")},
|
||||
@@ -79,7 +77,6 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
fq := &fakeQuerier{}
|
||||
fq.add(tc.metrics...)
|
||||
tc.rule.q = fq
|
||||
tc.rule.state = newRuleState(10)
|
||||
tss, err := tc.rule.Exec(context.TODO(), time.Now(), 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected Exec err: %s", err)
|
||||
@@ -198,7 +195,7 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
|
||||
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
|
||||
}
|
||||
rule := &RecordingRule{Name: "job:foo", state: newRuleState(10), Labels: map[string]string{
|
||||
rule := &RecordingRule{Name: "job:foo", Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
}}
|
||||
var err error
|
||||
@@ -214,13 +211,9 @@ func TestRecordingRuleLimit(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRecordingRule_ExecNegative(t *testing.T) {
|
||||
rr := &RecordingRule{
|
||||
Name: "job:foo",
|
||||
state: newRuleState(10),
|
||||
Labels: map[string]string{
|
||||
"job": "test",
|
||||
},
|
||||
}
|
||||
rr := &RecordingRule{Name: "job:foo", Labels: map[string]string{
|
||||
"job": "test",
|
||||
}}
|
||||
|
||||
fq := &fakeQuerier{}
|
||||
expErr := "connection reset by peer"
|
||||
|
||||
@@ -77,12 +77,13 @@ func Init(ctx context.Context) (*Client, error) {
|
||||
}
|
||||
|
||||
return NewClient(ctx, Config{
|
||||
Addr: *addr,
|
||||
AuthCfg: authCfg,
|
||||
Concurrency: *concurrency,
|
||||
MaxQueueSize: *maxQueueSize,
|
||||
MaxBatchSize: *maxBatchSize,
|
||||
FlushInterval: *flushInterval,
|
||||
Transport: t,
|
||||
Addr: *addr,
|
||||
AuthCfg: authCfg,
|
||||
Concurrency: *concurrency,
|
||||
MaxQueueSize: *maxQueueSize,
|
||||
MaxBatchSize: *maxBatchSize,
|
||||
FlushInterval: *flushInterval,
|
||||
DisablePathAppend: *disablePathAppend,
|
||||
Transport: t,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
|
||||
var (
|
||||
disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
|
||||
sendTimeout = flag.Duration("remoteWrite.sendTimeout", 30*time.Second, "Timeout for sending data to the configured -remoteWrite.url.")
|
||||
)
|
||||
|
||||
// Client is an asynchronous HTTP client for writing
|
||||
@@ -58,8 +57,13 @@ type Config struct {
|
||||
MaxQueueSize int
|
||||
// FlushInterval defines time interval for flushing batches
|
||||
FlushInterval time.Duration
|
||||
// WriteTimeout defines timeout for HTTP write request
|
||||
// to remote storage
|
||||
WriteTimeout time.Duration
|
||||
// Transport will be used by the underlying http.Client
|
||||
Transport *http.Transport
|
||||
// DisablePathAppend can be used to not automatically append '/api/v1/write' to the remote write url
|
||||
DisablePathAppend bool
|
||||
}
|
||||
|
||||
const (
|
||||
@@ -85,6 +89,9 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
if cfg.FlushInterval == 0 {
|
||||
cfg.FlushInterval = defaultFlushInterval
|
||||
}
|
||||
if cfg.WriteTimeout == 0 {
|
||||
cfg.WriteTimeout = defaultWriteTimeout
|
||||
}
|
||||
if cfg.Transport == nil {
|
||||
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
|
||||
}
|
||||
@@ -94,7 +101,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
}
|
||||
c := &Client{
|
||||
c: &http.Client{
|
||||
Timeout: *sendTimeout,
|
||||
Timeout: cfg.WriteTimeout,
|
||||
Transport: cfg.Transport,
|
||||
},
|
||||
addr: strings.TrimSuffix(cfg.Addr, "/"),
|
||||
@@ -211,7 +218,7 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Warnf("attempt %d to send request failed: %s", i+1, err)
|
||||
logger.Errorf("attempt %d to send request failed: %s", i+1, err)
|
||||
// sleeping to avoid remote db hammering
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
"github.com/dmitryk-dk/pb/v3"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
|
||||
@@ -3,7 +3,6 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
@@ -32,95 +31,3 @@ type Rule interface {
|
||||
}
|
||||
|
||||
var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels")
|
||||
|
||||
type ruleState struct {
|
||||
sync.RWMutex
|
||||
entries []ruleStateEntry
|
||||
cur int
|
||||
// disabled defines whether ruleState tracks ruleStateEntry
|
||||
disabled bool
|
||||
}
|
||||
|
||||
type ruleStateEntry struct {
|
||||
// stores last moment of time rule.Exec was called
|
||||
time time.Time
|
||||
// stores the timesteamp with which rule.Exec was called
|
||||
at time.Time
|
||||
// stores the duration of the last rule.Exec call
|
||||
duration time.Duration
|
||||
// stores last error that happened in Exec func
|
||||
// resets on every successful Exec
|
||||
// may be used as Health ruleState
|
||||
err error
|
||||
// stores the number of samples returned during
|
||||
// the last evaluation
|
||||
samples int
|
||||
// stores the curl command reflecting the HTTP request used during rule.Exec
|
||||
curl string
|
||||
}
|
||||
|
||||
func newRuleState(size int) *ruleState {
|
||||
if size < 1 {
|
||||
return &ruleState{disabled: true}
|
||||
}
|
||||
return &ruleState{
|
||||
entries: make([]ruleStateEntry, size),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ruleState) getLast() ruleStateEntry {
|
||||
if s.disabled {
|
||||
return ruleStateEntry{}
|
||||
}
|
||||
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
return s.entries[s.cur]
|
||||
}
|
||||
|
||||
func (s *ruleState) size() int {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
return len(s.entries)
|
||||
}
|
||||
|
||||
func (s *ruleState) getAll() []ruleStateEntry {
|
||||
if s.disabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
entries := make([]ruleStateEntry, 0)
|
||||
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
cur := s.cur
|
||||
for {
|
||||
e := s.entries[cur]
|
||||
if !e.time.IsZero() || !e.at.IsZero() {
|
||||
entries = append(entries, e)
|
||||
}
|
||||
cur--
|
||||
if cur < 0 {
|
||||
cur = cap(s.entries) - 1
|
||||
}
|
||||
if cur == s.cur {
|
||||
return entries
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ruleState) add(e ruleStateEntry) {
|
||||
if s.disabled {
|
||||
return
|
||||
}
|
||||
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
s.cur++
|
||||
if s.cur > cap(s.entries)-1 {
|
||||
s.cur = 0
|
||||
}
|
||||
s.entries[s.cur] = e
|
||||
}
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRule_stateDisabled(t *testing.T) {
|
||||
state := newRuleState(-1)
|
||||
e := state.getLast()
|
||||
if !e.at.IsZero() {
|
||||
t.Fatalf("expected entry to be zero")
|
||||
}
|
||||
|
||||
state.add(ruleStateEntry{at: time.Now()})
|
||||
if !e.at.IsZero() {
|
||||
t.Fatalf("expected entry to be zero")
|
||||
}
|
||||
|
||||
if len(state.getAll()) != 0 {
|
||||
t.Fatalf("expected for state to have %d entries; got %d",
|
||||
0, len(state.getAll()),
|
||||
)
|
||||
}
|
||||
}
|
||||
func TestRule_state(t *testing.T) {
|
||||
stateEntriesN := 20
|
||||
state := newRuleState(stateEntriesN)
|
||||
e := state.getLast()
|
||||
if !e.at.IsZero() {
|
||||
t.Fatalf("expected entry to be zero")
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
state.add(ruleStateEntry{at: now})
|
||||
|
||||
e = state.getLast()
|
||||
if e.at != now {
|
||||
t.Fatalf("expected entry at %v to be equal to %v",
|
||||
e.at, now)
|
||||
}
|
||||
|
||||
time.Sleep(time.Millisecond)
|
||||
now2 := time.Now()
|
||||
state.add(ruleStateEntry{at: now2})
|
||||
|
||||
e = state.getLast()
|
||||
if e.at != now2 {
|
||||
t.Fatalf("expected entry at %v to be equal to %v",
|
||||
e.at, now2)
|
||||
}
|
||||
|
||||
if len(state.getAll()) != 2 {
|
||||
t.Fatalf("expected for state to have 2 entries only; got %d",
|
||||
len(state.getAll()),
|
||||
)
|
||||
}
|
||||
|
||||
var last time.Time
|
||||
for i := 0; i < stateEntriesN*2; i++ {
|
||||
last = time.Now()
|
||||
state.add(ruleStateEntry{at: last})
|
||||
}
|
||||
|
||||
e = state.getLast()
|
||||
if e.at != last {
|
||||
t.Fatalf("expected entry at %v to be equal to %v",
|
||||
e.at, last)
|
||||
}
|
||||
|
||||
if len(state.getAll()) != stateEntriesN {
|
||||
t.Fatalf("expected for state to have %d entries only; got %d",
|
||||
stateEntriesN, len(state.getAll()),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRule_stateConcurrent supposed to test concurrent
|
||||
// execution of state updates.
|
||||
// Should be executed with -race flag
|
||||
func TestRule_stateConcurrent(t *testing.T) {
|
||||
state := newRuleState(20)
|
||||
|
||||
const workers = 50
|
||||
const iterations = 100
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < iterations; i++ {
|
||||
state.add(ruleStateEntry{at: time.Now()})
|
||||
state.getAll()
|
||||
state.getLast()
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
{% stripspace %}
|
||||
|
||||
{% func quotesEscape(s string) %}
|
||||
{%j= s %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func jsonEscape(s string) %}
|
||||
{%q= s %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func htmlEscape(s string) %}
|
||||
{%s s %}
|
||||
{% endfunc %}
|
||||
|
||||
{% endstripspace %}
|
||||
@@ -1,117 +0,0 @@
|
||||
// Code generated by qtc from "funcs.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:3
|
||||
package templates
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:3
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:3
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:3
|
||||
func streamquotesEscape(qw422016 *qt422016.Writer, s string) {
|
||||
//line app/vmalert/templates/funcs.qtpl:4
|
||||
qw422016.N().J(s)
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
}
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
func writequotesEscape(qq422016 qtio422016.Writer, s string) {
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
streamquotesEscape(qw422016, s)
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
}
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
func quotesEscape(s string) string {
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
writequotesEscape(qb422016, s)
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
return qs422016
|
||||
//line app/vmalert/templates/funcs.qtpl:5
|
||||
}
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:7
|
||||
func streamjsonEscape(qw422016 *qt422016.Writer, s string) {
|
||||
//line app/vmalert/templates/funcs.qtpl:8
|
||||
qw422016.N().Q(s)
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
}
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
func writejsonEscape(qq422016 qtio422016.Writer, s string) {
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
streamjsonEscape(qw422016, s)
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
}
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
func jsonEscape(s string) string {
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
writejsonEscape(qb422016, s)
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
return qs422016
|
||||
//line app/vmalert/templates/funcs.qtpl:9
|
||||
}
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:11
|
||||
func streamhtmlEscape(qw422016 *qt422016.Writer, s string) {
|
||||
//line app/vmalert/templates/funcs.qtpl:12
|
||||
qw422016.E().S(s)
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
}
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
func writehtmlEscape(qq422016 qtio422016.Writer, s string) {
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
streamhtmlEscape(qw422016, s)
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
}
|
||||
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
func htmlEscape(s string) string {
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
writehtmlEscape(qb422016, s)
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
return qs422016
|
||||
//line app/vmalert/templates/funcs.qtpl:13
|
||||
}
|
||||
@@ -27,11 +27,11 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
textTpl "text/template"
|
||||
"time"
|
||||
|
||||
textTpl "text/template"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/formatutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
@@ -207,10 +207,23 @@ func FuncsWithExternalURL(externalURL *url.URL) textTpl.FuncMap {
|
||||
// templateFuncs initiates template helper functions
|
||||
func templateFuncs() textTpl.FuncMap {
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/template_reference/
|
||||
// and https://github.com/prometheus/prometheus/blob/fa6e05903fd3ce52e374a6e1bf4eb98c9f1f45a7/template/template.go#L150
|
||||
return textTpl.FuncMap{
|
||||
/* Strings */
|
||||
|
||||
// reReplaceAll ReplaceAllString returns a copy of src, replacing matches of the Regexp with
|
||||
// the replacement string repl. Inside repl, $ signs are interpreted as in Expand,
|
||||
// so for instance $1 represents the text of the first submatch.
|
||||
// alias for https://golang.org/pkg/regexp/#Regexp.ReplaceAllString
|
||||
"reReplaceAll": func(pattern, repl, text string) string {
|
||||
re := regexp.MustCompile(pattern)
|
||||
return re.ReplaceAllString(text, repl)
|
||||
},
|
||||
|
||||
// match reports whether the string s
|
||||
// contains any match of the regular expression pattern.
|
||||
// alias for https://golang.org/pkg/regexp/#MatchString
|
||||
"match": regexp.MatchString,
|
||||
|
||||
// title returns a copy of the string s with all Unicode letters
|
||||
// that begin words mapped to their Unicode title case.
|
||||
// alias for https://golang.org/pkg/strings/#Title
|
||||
@@ -224,31 +237,6 @@ func templateFuncs() textTpl.FuncMap {
|
||||
// alias for https://golang.org/pkg/strings/#ToLower
|
||||
"toLower": strings.ToLower,
|
||||
|
||||
// crlfEscape replaces '\n' and '\r' chars with `\\n` and `\\r`.
|
||||
// This funcion is deprectated.
|
||||
//
|
||||
// It is better to use quotesEscape, jsonEscape, queryEscape or pathEscape instead -
|
||||
// these functions properly escape `\n` and `\r` chars according to their purpose.
|
||||
"crlfEscape": func(q string) string {
|
||||
q = strings.Replace(q, "\n", `\n`, -1)
|
||||
return strings.Replace(q, "\r", `\r`, -1)
|
||||
},
|
||||
|
||||
// quotesEscape escapes the string, so it can be safely put inside JSON string.
|
||||
//
|
||||
// See also jsonEscape.
|
||||
"quotesEscape": quotesEscape,
|
||||
|
||||
// jsonEscape converts the string to properly encoded JSON string.
|
||||
//
|
||||
// See also quotesEscape.
|
||||
"jsonEscape": jsonEscape,
|
||||
|
||||
// htmlEscape applies html-escaping to q, so it can be safely embedded as plaintext into html.
|
||||
//
|
||||
// See also safeHtml.
|
||||
"htmlEscape": htmlEscape,
|
||||
|
||||
// stripPort splits string into host and port, then returns only host.
|
||||
"stripPort": func(hostPort string) string {
|
||||
host, _, err := net.SplitHostPort(hostPort)
|
||||
@@ -258,37 +246,6 @@ func templateFuncs() textTpl.FuncMap {
|
||||
return host
|
||||
},
|
||||
|
||||
// stripDomain removes the domain part of a FQDN. Leaves port untouched.
|
||||
"stripDomain": func(hostPort string) string {
|
||||
host, port, err := net.SplitHostPort(hostPort)
|
||||
if err != nil {
|
||||
host = hostPort
|
||||
}
|
||||
ip := net.ParseIP(host)
|
||||
if ip != nil {
|
||||
return hostPort
|
||||
}
|
||||
host = strings.Split(host, ".")[0]
|
||||
if port != "" {
|
||||
return net.JoinHostPort(host, port)
|
||||
}
|
||||
return host
|
||||
},
|
||||
|
||||
// match reports whether the string s
|
||||
// contains any match of the regular expression pattern.
|
||||
// alias for https://golang.org/pkg/regexp/#MatchString
|
||||
"match": regexp.MatchString,
|
||||
|
||||
// reReplaceAll ReplaceAllString returns a copy of src, replacing matches of the Regexp with
|
||||
// the replacement string repl. Inside repl, $ signs are interpreted as in Expand,
|
||||
// so for instance $1 represents the text of the first submatch.
|
||||
// alias for https://golang.org/pkg/regexp/#Regexp.ReplaceAllString
|
||||
"reReplaceAll": func(pattern, repl, text string) string {
|
||||
re := regexp.MustCompile(pattern)
|
||||
return re.ReplaceAllString(text, repl)
|
||||
},
|
||||
|
||||
// parseDuration parses a duration string such as "1h" into the number of seconds it represents
|
||||
"parseDuration": func(s string) (float64, error) {
|
||||
d, err := promutils.ParseDuration(s)
|
||||
@@ -350,10 +307,18 @@ func templateFuncs() textTpl.FuncMap {
|
||||
if math.Abs(v) <= 1 || math.IsNaN(v) || math.IsInf(v, 0) {
|
||||
return fmt.Sprintf("%.4g", v), nil
|
||||
}
|
||||
return formatutil.HumanizeBytes(v), nil
|
||||
prefix := ""
|
||||
for _, p := range []string{"ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"} {
|
||||
if math.Abs(v) < 1024 {
|
||||
break
|
||||
}
|
||||
prefix = p
|
||||
v /= 1024
|
||||
}
|
||||
return fmt.Sprintf("%.4g%s", v, prefix), nil
|
||||
},
|
||||
|
||||
// humanizeDuration converts given seconds to a human-readable duration
|
||||
// humanizeDuration converts given seconds to a human readable duration
|
||||
"humanizeDuration": func(i interface{}) (string, error) {
|
||||
v, err := toFloat64(i)
|
||||
if err != nil {
|
||||
@@ -456,15 +421,31 @@ func templateFuncs() textTpl.FuncMap {
|
||||
return ""
|
||||
},
|
||||
|
||||
// pathEscape escapes the string so it can be safely placed inside a URL path segment.
|
||||
//
|
||||
// See also queryEscape.
|
||||
"pathEscape": url.PathEscape,
|
||||
// pathEscape escapes the string so it can be safely placed inside a URL path segment,
|
||||
// replacing special characters (including /) with %XX sequences as needed.
|
||||
// alias for https://golang.org/pkg/net/url/#PathEscape
|
||||
"pathEscape": func(u string) string {
|
||||
return url.PathEscape(u)
|
||||
},
|
||||
|
||||
// queryEscape escapes the string so it can be safely placed inside a query arg in URL.
|
||||
//
|
||||
// See also queryEscape.
|
||||
"queryEscape": url.QueryEscape,
|
||||
// queryEscape escapes the string so it can be safely placed
|
||||
// inside a URL query.
|
||||
// alias for https://golang.org/pkg/net/url/#QueryEscape
|
||||
"queryEscape": func(q string) string {
|
||||
return url.QueryEscape(q)
|
||||
},
|
||||
|
||||
// crlfEscape replaces new line chars to skip URL encoding.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/890
|
||||
"crlfEscape": func(q string) string {
|
||||
q = strings.Replace(q, "\n", `\n`, -1)
|
||||
return strings.Replace(q, "\r", `\r`, -1)
|
||||
},
|
||||
|
||||
// quotesEscape escapes quote char
|
||||
"quotesEscape": func(q string) string {
|
||||
return strings.Replace(q, `"`, `\"`, -1)
|
||||
},
|
||||
|
||||
// query executes the MetricsQL/PromQL query against
|
||||
// configured `datasource.url` address.
|
||||
@@ -496,17 +477,6 @@ func templateFuncs() textTpl.FuncMap {
|
||||
return m.Labels[label]
|
||||
},
|
||||
|
||||
// value returns the value of the given metric.
|
||||
// usually used alongside with `query` template function.
|
||||
"value": func(m metric) float64 {
|
||||
return m.Value
|
||||
},
|
||||
|
||||
// strvalue returns metric name.
|
||||
"strvalue": func(m metric) string {
|
||||
return m.Labels["__name__"]
|
||||
},
|
||||
|
||||
// sortByLabel sorts the given metrics by provided label key
|
||||
"sortByLabel": func(label string, metrics []metric) []metric {
|
||||
sort.SliceStable(metrics, func(i, j int) bool {
|
||||
@@ -515,6 +485,12 @@ func templateFuncs() textTpl.FuncMap {
|
||||
return metrics
|
||||
},
|
||||
|
||||
// value returns the value of the given metric.
|
||||
// usually used alongside with `query` template function.
|
||||
"value": func(m metric) float64 {
|
||||
return m.Value
|
||||
},
|
||||
|
||||
/* Helpers */
|
||||
|
||||
// Converts a list of objects to a map with keys arg0, arg1 etc.
|
||||
@@ -528,8 +504,6 @@ func templateFuncs() textTpl.FuncMap {
|
||||
},
|
||||
|
||||
// safeHtml marks string as HTML not requiring auto-escaping.
|
||||
//
|
||||
// See also htmlEscape.
|
||||
"safeHtml": func(text string) htmlTpl.HTML {
|
||||
return htmlTpl.HTML(text)
|
||||
},
|
||||
|
||||
@@ -1,83 +1,11 @@
|
||||
package templates
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strings"
|
||||
"testing"
|
||||
textTpl "text/template"
|
||||
)
|
||||
|
||||
func TestTemplateFuncs(t *testing.T) {
|
||||
funcs := templateFuncs()
|
||||
f := func(funcName, s, resultExpected string) {
|
||||
t.Helper()
|
||||
v := funcs[funcName]
|
||||
fLocal := v.(func(s string) string)
|
||||
result := fLocal(s)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for %s(%q); got\n%s\nwant\n%s", funcName, s, result, resultExpected)
|
||||
}
|
||||
}
|
||||
f("title", "foo bar", "Foo Bar")
|
||||
f("toUpper", "foo", "FOO")
|
||||
f("toLower", "FOO", "foo")
|
||||
f("pathEscape", "foo/bar\n+baz", "foo%2Fbar%0A+baz")
|
||||
f("queryEscape", "foo+bar\n+baz", "foo%2Bbar%0A%2Bbaz")
|
||||
f("jsonEscape", `foo{bar="baz"}`+"\n + 1", `"foo{bar=\"baz\"}\n + 1"`)
|
||||
f("quotesEscape", `foo{bar="baz"}`+"\n + 1", `foo{bar=\"baz\"}\n + 1`)
|
||||
f("htmlEscape", "foo < 10\nabc", "foo < 10\nabc")
|
||||
f("crlfEscape", "foo\nbar\rx", `foo\nbar\rx`)
|
||||
f("stripPort", "foo", "foo")
|
||||
f("stripPort", "foo:1234", "foo")
|
||||
f("stripDomain", "foo.bar.baz", "foo")
|
||||
f("stripDomain", "foo.bar:123", "foo:123")
|
||||
|
||||
// check "match" func
|
||||
matchFunc := funcs["match"].(func(pattern, s string) (bool, error))
|
||||
if _, err := matchFunc("invalid[regexp", "abc"); err == nil {
|
||||
t.Fatalf("expecting non-nil error on invalid regexp")
|
||||
}
|
||||
ok, err := matchFunc("abc", "def")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error")
|
||||
}
|
||||
if ok {
|
||||
t.Fatalf("unexpected match")
|
||||
}
|
||||
ok, err = matchFunc("a.+b", "acsdb")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error")
|
||||
}
|
||||
if !ok {
|
||||
t.Fatalf("unexpected mismatch")
|
||||
}
|
||||
|
||||
formatting := func(funcName string, p interface{}, resultExpected string) {
|
||||
t.Helper()
|
||||
v := funcs[funcName]
|
||||
fLocal := v.(func(s interface{}) (string, error))
|
||||
result, err := fLocal(p)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for %s(%f): %s", funcName, p, err)
|
||||
}
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for %s(%f); got\n%s\nwant\n%s", funcName, p, result, resultExpected)
|
||||
}
|
||||
}
|
||||
formatting("humanize1024", float64(0), "0")
|
||||
formatting("humanize1024", math.Inf(0), "+Inf")
|
||||
formatting("humanize1024", math.NaN(), "NaN")
|
||||
formatting("humanize1024", float64(127087), "124.1ki")
|
||||
formatting("humanize1024", float64(130137088), "124.1Mi")
|
||||
formatting("humanize1024", float64(133260378112), "124.1Gi")
|
||||
formatting("humanize1024", float64(136458627186688), "124.1Ti")
|
||||
formatting("humanize1024", float64(139733634239168512), "124.1Pi")
|
||||
formatting("humanize1024", float64(143087241460908556288), "124.1Ei")
|
||||
formatting("humanize1024", float64(146521335255970361638912), "124.1Zi")
|
||||
formatting("humanize1024", float64(150037847302113650318245888), "124.1Yi")
|
||||
formatting("humanize1024", float64(153638755637364377925883789312), "1.271e+05Yi")
|
||||
}
|
||||
|
||||
func mkTemplate(current, replacement interface{}) textTemplate {
|
||||
tmpl := textTemplate{}
|
||||
if current != nil {
|
||||
|
||||
@@ -59,15 +59,6 @@
|
||||
background-color: rgba(0,0,0,.5);
|
||||
-webkit-box-shadow: 0 0 1px rgba(255,255,255,.5);
|
||||
}
|
||||
textarea.curl-area{
|
||||
width: 100%;
|
||||
line-height: 1;
|
||||
font-size: 12px;
|
||||
border: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
overflow: scroll;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
@@ -101,152 +101,143 @@ func StreamHeader(qw422016 *qt422016.Writer, r *http.Request, navItems []NavItem
|
||||
background-color: rgba(0,0,0,.5);
|
||||
-webkit-box-shadow: 0 0 1px rgba(255,255,255,.5);
|
||||
}
|
||||
textarea.curl-area{
|
||||
width: 100%;
|
||||
line-height: 1;
|
||||
font-size: 12px;
|
||||
border: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
overflow: scroll;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
`)
|
||||
//line app/vmalert/tpl/header.qtpl:74
|
||||
//line app/vmalert/tpl/header.qtpl:65
|
||||
streamprintNavItems(qw422016, r, title, navItems)
|
||||
//line app/vmalert/tpl/header.qtpl:74
|
||||
//line app/vmalert/tpl/header.qtpl:65
|
||||
qw422016.N().S(`
|
||||
<main class="px-2">
|
||||
`)
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
func WriteHeader(qq422016 qtio422016.Writer, r *http.Request, navItems []NavItem, title string) {
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
StreamHeader(qw422016, r, navItems, title)
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
func Header(r *http.Request, navItems []NavItem, title string) string {
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
WriteHeader(qb422016, r, navItems, title)
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
return qs422016
|
||||
//line app/vmalert/tpl/header.qtpl:76
|
||||
//line app/vmalert/tpl/header.qtpl:67
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/header.qtpl:80
|
||||
//line app/vmalert/tpl/header.qtpl:71
|
||||
type NavItem struct {
|
||||
Name string
|
||||
Url string
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/header.qtpl:86
|
||||
//line app/vmalert/tpl/header.qtpl:77
|
||||
func streamprintNavItems(qw422016 *qt422016.Writer, r *http.Request, current string, items []NavItem) {
|
||||
//line app/vmalert/tpl/header.qtpl:86
|
||||
//line app/vmalert/tpl/header.qtpl:77
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/tpl/header.qtpl:88
|
||||
//line app/vmalert/tpl/header.qtpl:79
|
||||
prefix := "/vmalert/"
|
||||
if strings.HasPrefix(r.URL.Path, prefix) {
|
||||
prefix = ""
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/header.qtpl:92
|
||||
//line app/vmalert/tpl/header.qtpl:83
|
||||
qw422016.N().S(`
|
||||
<nav class="navbar navbar-expand-md navbar-dark fixed-top bg-dark">
|
||||
<div class="container-fluid">
|
||||
<div class="collapse navbar-collapse" id="navbarCollapse">
|
||||
<ul class="navbar-nav me-auto mb-2 mb-md-0">
|
||||
`)
|
||||
//line app/vmalert/tpl/header.qtpl:97
|
||||
//line app/vmalert/tpl/header.qtpl:88
|
||||
for _, item := range items {
|
||||
//line app/vmalert/tpl/header.qtpl:97
|
||||
//line app/vmalert/tpl/header.qtpl:88
|
||||
qw422016.N().S(`
|
||||
<li class="nav-item">
|
||||
`)
|
||||
//line app/vmalert/tpl/header.qtpl:100
|
||||
//line app/vmalert/tpl/header.qtpl:91
|
||||
u, _ := url.Parse(item.Url)
|
||||
|
||||
//line app/vmalert/tpl/header.qtpl:101
|
||||
//line app/vmalert/tpl/header.qtpl:92
|
||||
qw422016.N().S(`
|
||||
<a class="nav-link`)
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
//line app/vmalert/tpl/header.qtpl:93
|
||||
if current == item.Name {
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
//line app/vmalert/tpl/header.qtpl:93
|
||||
qw422016.N().S(` active`)
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
//line app/vmalert/tpl/header.qtpl:93
|
||||
}
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
//line app/vmalert/tpl/header.qtpl:93
|
||||
qw422016.N().S(`"
|
||||
href="`)
|
||||
//line app/vmalert/tpl/header.qtpl:103
|
||||
//line app/vmalert/tpl/header.qtpl:94
|
||||
if u.IsAbs() {
|
||||
//line app/vmalert/tpl/header.qtpl:103
|
||||
//line app/vmalert/tpl/header.qtpl:94
|
||||
qw422016.E().S(item.Url)
|
||||
//line app/vmalert/tpl/header.qtpl:103
|
||||
//line app/vmalert/tpl/header.qtpl:94
|
||||
} else {
|
||||
//line app/vmalert/tpl/header.qtpl:103
|
||||
//line app/vmalert/tpl/header.qtpl:94
|
||||
qw422016.E().S(path.Join(prefix, item.Url))
|
||||
//line app/vmalert/tpl/header.qtpl:103
|
||||
//line app/vmalert/tpl/header.qtpl:94
|
||||
}
|
||||
//line app/vmalert/tpl/header.qtpl:103
|
||||
//line app/vmalert/tpl/header.qtpl:94
|
||||
qw422016.N().S(`">
|
||||
`)
|
||||
//line app/vmalert/tpl/header.qtpl:104
|
||||
//line app/vmalert/tpl/header.qtpl:95
|
||||
qw422016.E().S(item.Name)
|
||||
//line app/vmalert/tpl/header.qtpl:104
|
||||
//line app/vmalert/tpl/header.qtpl:95
|
||||
qw422016.N().S(`
|
||||
</a>
|
||||
</li>
|
||||
`)
|
||||
//line app/vmalert/tpl/header.qtpl:107
|
||||
//line app/vmalert/tpl/header.qtpl:98
|
||||
}
|
||||
//line app/vmalert/tpl/header.qtpl:107
|
||||
//line app/vmalert/tpl/header.qtpl:98
|
||||
qw422016.N().S(`
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
`)
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
func writeprintNavItems(qq422016 qtio422016.Writer, r *http.Request, current string, items []NavItem) {
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
streamprintNavItems(qw422016, r, current, items)
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
func printNavItems(r *http.Request, current string, items []NavItem) string {
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
writeprintNavItems(qb422016, r, current, items)
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
return qs422016
|
||||
//line app/vmalert/tpl/header.qtpl:111
|
||||
//line app/vmalert/tpl/header.qtpl:102
|
||||
}
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
@@ -50,62 +47,3 @@ func newTimeSeriesPB(values []float64, timestamps []int64, labels []prompbmarsha
|
||||
ts.Labels = labels
|
||||
return ts
|
||||
}
|
||||
|
||||
type curlWriter struct {
|
||||
b strings.Builder
|
||||
}
|
||||
|
||||
func (cw *curlWriter) string() string {
|
||||
res := "curl " + cw.b.String()
|
||||
cw.b.Reset()
|
||||
return strings.TrimSpace(res)
|
||||
}
|
||||
|
||||
func (cw *curlWriter) addWithEsc(str string) {
|
||||
escStr := `'` + strings.Replace(str, `'`, `'\''`, -1) + `'`
|
||||
cw.add(escStr)
|
||||
}
|
||||
|
||||
func (cw *curlWriter) add(str string) {
|
||||
cw.b.WriteString(str)
|
||||
cw.b.WriteString(" ")
|
||||
}
|
||||
|
||||
func requestToCurl(req *http.Request) string {
|
||||
if req == nil || req.URL == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
cw := &curlWriter{}
|
||||
|
||||
schema := req.URL.Scheme
|
||||
requestURL := req.URL.String()
|
||||
if schema == "" {
|
||||
schema = "http"
|
||||
if req.TLS != nil {
|
||||
schema = "https"
|
||||
}
|
||||
requestURL = schema + "://" + req.Host + requestURL
|
||||
}
|
||||
|
||||
if schema == "https" {
|
||||
cw.add("-k")
|
||||
}
|
||||
|
||||
cw.add("-X")
|
||||
cw.add(req.Method)
|
||||
|
||||
var keys []string
|
||||
for k := range req.Header {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
for _, k := range keys {
|
||||
cw.add("-H")
|
||||
cw.addWithEsc(fmt.Sprintf("%s: %s", k, strings.Join(req.Header[k], " ")))
|
||||
}
|
||||
|
||||
cw.addWithEsc(requestURL)
|
||||
return cw.string()
|
||||
}
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRequestToCurl(t *testing.T) {
|
||||
f := func(req *http.Request, exp string) {
|
||||
got := requestToCurl(req)
|
||||
if got != exp {
|
||||
t.Fatalf("expected to have %q; got %q instead", exp, got)
|
||||
}
|
||||
}
|
||||
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
f(req, "curl -X POST 'http://foo.com'")
|
||||
|
||||
req, _ = http.NewRequest(http.MethodGet, "https://foo.com", nil)
|
||||
f(req, "curl -k -X GET 'https://foo.com'")
|
||||
|
||||
req, _ = http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
req.Header.Set("foo", "bar")
|
||||
req.Header.Set("baz", "qux")
|
||||
f(req, "curl -X POST -H 'Baz: qux' -H 'Foo: bar' 'http://foo.com'")
|
||||
|
||||
req, _ = http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
params := req.URL.Query()
|
||||
params.Add("query", "up")
|
||||
params.Add("step", "10")
|
||||
req.URL.RawQuery = params.Encode()
|
||||
f(req, "curl -X POST 'http://foo.com?query=up&step=10'")
|
||||
|
||||
req, _ = http.NewRequest(http.MethodPost, "http://foo.com", nil)
|
||||
params = req.URL.Query()
|
||||
params.Add("query", "up")
|
||||
params.Add("step", "10")
|
||||
req.URL.RawQuery = params.Encode()
|
||||
f(req, "curl -X POST 'http://foo.com?query=up&step=10'")
|
||||
|
||||
req, _ = http.NewRequest(http.MethodPost, "https://foo.com", nil)
|
||||
params = req.URL.Query()
|
||||
params.Add("query", "up")
|
||||
params.Add("step", "10")
|
||||
req.URL.RawQuery = params.Encode()
|
||||
f(req, "curl -k -X POST 'https://foo.com?query=up&step=10'")
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 109 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 41 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 41 KiB |
@@ -8,6 +8,7 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
||||
@@ -17,14 +18,20 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
once = sync.Once{}
|
||||
apiLinks [][2]string
|
||||
navItems []tpl.NavItem
|
||||
)
|
||||
|
||||
func initLinks() {
|
||||
apiLinks = [][2]string{
|
||||
// api links are relative since they can be used by external clients,
|
||||
// such as Grafana, and proxied via vmselect.
|
||||
{"api/v1/rules", "list all loaded groups and rules"},
|
||||
{"api/v1/alerts", "list all active alerts"},
|
||||
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
|
||||
}
|
||||
systemLinks = [][2]string{
|
||||
|
||||
// system links
|
||||
{"/flags", "command-line flags"},
|
||||
{"/metrics", "list of application metrics"},
|
||||
{"/-/reload", "reload configuration"},
|
||||
@@ -36,7 +43,7 @@ var (
|
||||
{Name: "Notifiers", Url: "notifiers"},
|
||||
{Name: "Docs", Url: "https://docs.victoriametrics.com/vmalert.html"},
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
type requestHandler struct {
|
||||
m *manager
|
||||
@@ -50,6 +57,10 @@ var (
|
||||
)
|
||||
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
once.Do(func() {
|
||||
initLinks()
|
||||
})
|
||||
|
||||
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
|
||||
staticServer.ServeHTTP(w, r)
|
||||
return true
|
||||
@@ -74,14 +85,6 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
WriteAlert(w, r, alert)
|
||||
return true
|
||||
case "/vmalert/rule":
|
||||
rule, err := rh.getRule(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
WriteRuleDetails(w, r, rule)
|
||||
return true
|
||||
case "/vmalert/groups":
|
||||
WriteListGroups(w, r, rh.groups())
|
||||
return true
|
||||
@@ -157,7 +160,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if strings.HasPrefix(r.URL.Path, "/api/v1/") {
|
||||
redirectURL = alert.APILink()
|
||||
}
|
||||
httpserver.Redirect(w, "/"+redirectURL)
|
||||
httpserver.RedirectPermanent(w, "/"+redirectURL)
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -165,25 +168,8 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
const (
|
||||
paramGroupID = "group_id"
|
||||
paramAlertID = "alert_id"
|
||||
paramRuleID = "rule_id"
|
||||
)
|
||||
|
||||
func (rh *requestHandler) getRule(r *http.Request) (APIRule, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 0)
|
||||
if err != nil {
|
||||
return APIRule{}, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
|
||||
}
|
||||
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 0)
|
||||
if err != nil {
|
||||
return APIRule{}, fmt.Errorf("failed to read %q param: %s", paramRuleID, err)
|
||||
}
|
||||
rule, err := rh.m.RuleAPI(groupID, ruleID)
|
||||
if err != nil {
|
||||
return APIRule{}, errResponse(err, http.StatusNotFound)
|
||||
}
|
||||
return rule, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*APIAlert, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 0)
|
||||
if err != nil {
|
||||
|
||||
@@ -16,22 +16,16 @@
|
||||
<p>
|
||||
API:<br>
|
||||
{% for _, p := range apiLinks %}
|
||||
{%code p, doc := p[0], p[1] %}
|
||||
<a href="{%s p %}">{%s p %}</a> - {%s doc %}<br/>
|
||||
{%code
|
||||
p, doc := p[0], p[1]
|
||||
%}
|
||||
<a href="{%s p %}">{%s p %}</a> - {%s doc %}<br/>
|
||||
{% endfor %}
|
||||
{% if r.Header.Get("X-Forwarded-For") == "" %}
|
||||
System:<br>
|
||||
{% for _, p := range systemLinks %}
|
||||
{%code p, doc := p[0], p[1] %}
|
||||
<a href="{%s p %}">{%s p %}</a> - {%s doc %}<br/>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
</p>
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func ListGroups(r *http.Request, groups []APIGroup) %}
|
||||
{%code prefix := utils.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "Groups") %}
|
||||
{% if len(groups) > 0 %}
|
||||
{%code
|
||||
@@ -91,7 +85,6 @@
|
||||
{% else %}
|
||||
<b>record:</b> {%s r.Name %}
|
||||
{% endif %}
|
||||
| <span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||
</div>
|
||||
<div class="col-12">
|
||||
<code><pre>{%s r.Query %}</pre></code>
|
||||
@@ -123,7 +116,7 @@
|
||||
|
||||
{% else %}
|
||||
<div>
|
||||
<p>No groups...</p>
|
||||
<p>No items...</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
@@ -211,7 +204,7 @@
|
||||
|
||||
{% else %}
|
||||
<div>
|
||||
<p>No active alerts...</p>
|
||||
<p>No items...</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
@@ -253,7 +246,7 @@
|
||||
{% for _, n := range ns %}
|
||||
<tr>
|
||||
<td>
|
||||
{% for _, l := range n.Labels.GetLabels() %}
|
||||
{% for _, l := range n.Labels %}
|
||||
<span class="ms-1 badge bg-primary">{%s l.Name %}={%s l.Value %}</span>
|
||||
{% endfor %}
|
||||
</td>
|
||||
@@ -267,7 +260,7 @@
|
||||
|
||||
{% else %}
|
||||
<div>
|
||||
<p>No targets...</p>
|
||||
<p>No items...</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
@@ -291,7 +284,7 @@
|
||||
}
|
||||
sort.Strings(annotationKeys)
|
||||
%}
|
||||
<div class="display-6 pb-3 mb-3">Alert: {%s alert.Name %}<span class="ms-2 badge {% if alert.State=="firing" %}bg-danger{% else %} bg-warning text-dark{% endif %}">{%s alert.State %}</span></div>
|
||||
<div class="display-6 pb-3 mb-3">{%s alert.Name %}<span class="ms-2 badge {% if alert.State=="firing" %}bg-danger{% else %} bg-warning text-dark{% endif %}">{%s alert.State %}</span></div>
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
@@ -361,125 +354,6 @@
|
||||
|
||||
{% endfunc %}
|
||||
|
||||
|
||||
{% func RuleDetails(r *http.Request, rule APIRule) %}
|
||||
{%code prefix := utils.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "") %}
|
||||
{%code
|
||||
var labelKeys []string
|
||||
for k := range rule.Labels {
|
||||
labelKeys = append(labelKeys, k)
|
||||
}
|
||||
sort.Strings(labelKeys)
|
||||
|
||||
var annotationKeys []string
|
||||
for k := range rule.Annotations {
|
||||
annotationKeys = append(annotationKeys, k)
|
||||
}
|
||||
sort.Strings(annotationKeys)
|
||||
%}
|
||||
<div class="display-6 pb-3 mb-3">Rule: {%s rule.Name %}<span class="ms-2 badge {% if rule.Health!="ok" %}bg-danger{% else %} bg-warning text-dark{% endif %}">{%s rule.Health %}</span></div>
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
Expr
|
||||
</div>
|
||||
<div class="col">
|
||||
<code><pre>{%s rule.Query %}</pre></code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% if rule.Type == "alerting" %}
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
For
|
||||
</div>
|
||||
<div class="col">
|
||||
{%v rule.Duration %} seconds
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
Labels
|
||||
</div>
|
||||
<div class="col">
|
||||
{% for _, k := range labelKeys %}
|
||||
<span class="m-1 badge bg-primary">{%s k %}={%s rule.Labels[k] %}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% if rule.Type == "alerting" %}
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
Annotations
|
||||
</div>
|
||||
<div class="col">
|
||||
{% for _, k := range annotationKeys %}
|
||||
<b>{%s k %}:</b><br>
|
||||
<p>{%s rule.Annotations[k] %}</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
Group
|
||||
</div>
|
||||
<div class="col">
|
||||
<a target="_blank" href="{%s prefix %}groups#group-{%s rule.GroupID %}">{%s rule.GroupID %}</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<div class="display-6 pb-3">Last {%d len(rule.Updates) %}/{%d rule.MaxUpdates %} updates</span>:</div>
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many samples were returned">Samples</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
|
||||
{% for _, u := range rule.Updates %}
|
||||
<tr{% if u.err != nil %} class="alert-danger"{% endif %}>
|
||||
<td>
|
||||
<span class="badge bg-primary rounded-pill me-3" title="Updated at">{%s u.time.Format(time.RFC3339) %}</span>
|
||||
</td>
|
||||
<td class="text-center" wi>{%d u.samples %}</td>
|
||||
<td class="text-center">{%f.3 u.duration.Seconds() %}s</td>
|
||||
<td class="text-center">{%s u.at.Format(time.RFC3339) %}</td>
|
||||
<td>
|
||||
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">{%s u.curl %}</textarea>
|
||||
</td>
|
||||
</tr>
|
||||
</li>
|
||||
{% if u.err != nil %}
|
||||
<tr{% if u.err != nil %} class="alert-danger"{% endif %}>
|
||||
<td colspan="5">
|
||||
<span class="alert-danger">{%v u.err %}</span>
|
||||
</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
|
||||
|
||||
{% func badgeState(state string) %}
|
||||
{%code
|
||||
badgeClass := "bg-warning text-dark"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -17,7 +17,6 @@ func TestHandler(t *testing.T) {
|
||||
alerts: map[uint64]*notifier.Alert{
|
||||
0: {State: notifier.StateFiring},
|
||||
},
|
||||
state: newRuleState(10),
|
||||
}
|
||||
g := &Group{
|
||||
Name: "group",
|
||||
@@ -53,22 +52,6 @@ func TestHandler(t *testing.T) {
|
||||
t.Run("/", func(t *testing.T) {
|
||||
getResp(ts.URL, nil, 200)
|
||||
getResp(ts.URL+"/vmalert", nil, 200)
|
||||
getResp(ts.URL+"/vmalert/alerts", nil, 200)
|
||||
getResp(ts.URL+"/vmalert/groups", nil, 200)
|
||||
getResp(ts.URL+"/vmalert/notifiers", nil, 200)
|
||||
getResp(ts.URL+"/rules", nil, 200)
|
||||
})
|
||||
|
||||
t.Run("/vmalert/rule", func(t *testing.T) {
|
||||
a := ar.ToAPI()
|
||||
getResp(ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||
})
|
||||
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
|
||||
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
|
||||
getResp(ts.URL+"/vmalert/rule"+params, nil, 404)
|
||||
|
||||
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramRuleID)
|
||||
getResp(ts.URL+"/vmalert/rule"+params, nil, 404)
|
||||
})
|
||||
|
||||
t.Run("/api/v1/alerts", func(t *testing.T) {
|
||||
|
||||
@@ -86,7 +86,7 @@ type GroupAlerts struct {
|
||||
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
type APIRule struct {
|
||||
// State must be one of these under following scenarios
|
||||
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
|
||||
// "pending": at least 1 alert in the rule in pending state and no other alert in firing state.
|
||||
// "firing": at least 1 alert in the rule in firing state.
|
||||
// "inactive": no alert in the rule in firing or pending state.
|
||||
State string `json:"state"`
|
||||
@@ -116,19 +116,8 @@ type APIRule struct {
|
||||
// Type of the rule: recording or alerting
|
||||
DatasourceType string `json:"datasourceType"`
|
||||
LastSamples int `json:"lastSamples"`
|
||||
// ID is a unique Alert's ID within a group
|
||||
// ID is an unique Alert's ID within a group
|
||||
ID string `json:"id"`
|
||||
// GroupID is an unique Group's ID
|
||||
GroupID string `json:"group_id"`
|
||||
|
||||
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
||||
MaxUpdates int `json:"max_updates_entries"`
|
||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||
Updates []ruleStateEntry `json:"updates"`
|
||||
}
|
||||
|
||||
// WebLink returns a link to the alert which can be used in UI.
|
||||
func (ar APIRule) WebLink() string {
|
||||
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
||||
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||
}
|
||||
|
||||
@@ -167,7 +167,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.18.
|
||||
2. Run `make vmauth` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmauth` binary and puts it into the `bin` folder.
|
||||
|
||||
@@ -239,7 +239,7 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
@@ -270,8 +270,6 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -284,7 +282,7 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
The maximum number of idle connections vmauth can open per each backend host (default 100)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
@@ -310,8 +308,6 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
@@ -250,11 +250,7 @@ func readAuthConfig(path string) (map[string]*UserInfo, error) {
|
||||
}
|
||||
|
||||
func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
|
||||
var err error
|
||||
data, err = envtemplate.ReplaceBytes(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot expand environment vars: %w", err)
|
||||
}
|
||||
data = envtemplate.Replace(data)
|
||||
var ac AuthConfig
|
||||
if err := yaml.UnmarshalStrict(data, &ac); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal AuthConfig data: %w", err)
|
||||
|
||||
@@ -60,7 +60,9 @@ func main() {
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
switch r.URL.Path {
|
||||
case "/-/reload":
|
||||
if !httpserver.CheckAuthFlag(w, r, *reloadAuthKey, "reloadAuthKey") {
|
||||
authKey := r.FormValue("authKey")
|
||||
if authKey != *reloadAuthKey {
|
||||
httpserver.Errorf(w, r, "invalid authKey %q. It must match the value from -reloadAuthKey command line flag", authKey)
|
||||
return true
|
||||
}
|
||||
configReloadRequests.Inc()
|
||||
|
||||
@@ -39,19 +39,10 @@ func createTargetURL(ui *UserInfo, uOrig *url.URL) (*url.URL, []Header, error) {
|
||||
u := *uOrig
|
||||
// Prevent from attacks with using `..` in r.URL.Path
|
||||
u.Path = path.Clean(u.Path)
|
||||
if !strings.HasSuffix(u.Path, "/") && strings.HasSuffix(uOrig.Path, "/") {
|
||||
// The path.Clean() removes traling slash.
|
||||
// Return it back if needed.
|
||||
// This should fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1752
|
||||
u.Path += "/"
|
||||
}
|
||||
if !strings.HasPrefix(u.Path, "/") {
|
||||
u.Path = "/" + u.Path
|
||||
}
|
||||
if u.Path == "/" {
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1554
|
||||
u.Path = ""
|
||||
}
|
||||
u.Path = strings.TrimSuffix(u.Path, "/")
|
||||
for _, e := range ui.URLMap {
|
||||
for _, sp := range e.SrcPaths {
|
||||
if sp.match(u.Path) {
|
||||
|
||||
@@ -2,6 +2,13 @@
|
||||
|
||||
`vmbackup` creates VictoriaMetrics data backups from [instant snapshots](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
|
||||
Supported storage systems for backups:
|
||||
|
||||
* [GCS](https://cloud.google.com/storage/). Example: `gs://<bucket>/<path/to/backup>`
|
||||
* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
|
||||
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/) or [Swift](https://platform.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
|
||||
* Local filesystem. Example: `fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
|
||||
|
||||
`vmbackup` supports incremental and full backups. Incremental backups are created automatically if the destination path already contains data from the previous backup.
|
||||
Full backups can be sped up with `-origin` pointing to an already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
|
||||
data between the existing backup and new backup. It saves time and costs on data transfer.
|
||||
@@ -15,16 +22,6 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
See also [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html) tool built on top of `vmbackup`. This tool simplifies
|
||||
creation of hourly, daily, weekly and monthly backups.
|
||||
|
||||
## Supported storage types
|
||||
|
||||
`vmbackup` supports the following `-dst` storage types:
|
||||
|
||||
* [GCS](https://cloud.google.com/storage/). Example: `gs://<bucket>/<path/to/backup>`
|
||||
* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
|
||||
* [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs/). Example: `azblob://<container>/<path/to/backup>`
|
||||
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/) or [Swift](https://platform.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
|
||||
* Local filesystem. Example: `fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
|
||||
|
||||
## Use cases
|
||||
|
||||
### Regular backups
|
||||
@@ -32,7 +29,7 @@ creation of hourly, daily, weekly and monthly backups.
|
||||
Regular backup can be performed with the following command:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup>
|
||||
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup>
|
||||
```
|
||||
|
||||
* `</path/to/victoria-metrics-data>` - path to VictoriaMetrics data pointed by `-storageDataPath` command-line flag in single-node VictoriaMetrics or in cluster `vmstorage`.
|
||||
@@ -77,7 +74,7 @@ The command will upload only changed data to `gs://<bucket>/latest`.
|
||||
* Run the following command once a day:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<YYYYMMDD> -origin=gs://<bucket>/latest
|
||||
vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<YYYYMMDD> -origin=gs://<bucket>/latest
|
||||
```
|
||||
|
||||
Where `<daily-snapshot>` is the snapshot for the last day `<YYYYMMDD>`.
|
||||
@@ -154,11 +151,6 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service-account-email"
|
||||
}
|
||||
```
|
||||
* Obtaining credentials from env variables.
|
||||
- For AWS S3 compatible storages set env variable `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
|
||||
Also you can set env variable `AWS_SHARED_CREDENTIALS_FILE` with path to credentials file.
|
||||
- For GCE cloud storage set env variable `GOOGLE_APPLICATION_CREDENTIALS` with path to credentials file.
|
||||
- For Azure storage either set env variables `AZURE_STORAGE_ACCOUNT_NAME` and `AZURE_STORAGE_ACCOUNT_KEY`, or `AZURE_STORAGE_ACCOUNT_CONNECTION_STRING`.
|
||||
|
||||
* Usage with s3 custom url endpoint. It is possible to use `vmbackup` with s3 compatible storages like minio, cloudian, etc.
|
||||
You have to add a custom url endpoint via flag:
|
||||
@@ -187,7 +179,7 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
-customS3Endpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
-dst string
|
||||
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir
|
||||
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default only IPv4 TCP and UDP is used
|
||||
@@ -196,7 +188,7 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
|
||||
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
@@ -225,8 +217,6 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
@@ -237,10 +227,10 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxBytesPerSecond size
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
@@ -276,8 +266,6 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
@@ -288,7 +276,7 @@ It is recommended using [binary releases](https://github.com/VictoriaMetrics/Vic
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.18.
|
||||
2. Run `make vmbackup` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmbackup` binary and puts it into the `bin` folder.
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -31,7 +30,7 @@ var (
|
||||
snapshotDeleteURL = flag.String("snapshot.deleteURL", "", "VictoriaMetrics delete snapshot url. Optional. Will be generated from -snapshot.createURL if not provided. "+
|
||||
"All created snapshots will be automatically deleted. Example: http://victoriametrics:8428/snapshot/delete")
|
||||
dst = flag.String("dst", "", "Where to put the backup on the remote storage. "+
|
||||
"Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir\n"+
|
||||
"Example: gs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir\n"+
|
||||
"-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded")
|
||||
origin = flag.String("origin", "", "Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups")
|
||||
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce backup duration")
|
||||
@@ -42,36 +41,25 @@ func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
flagutil.RegisterSecretFlag("snapshot.createURL")
|
||||
flagutil.RegisterSecretFlag("snapshot.deleteURL")
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
pushmetrics.Init()
|
||||
|
||||
if len(*snapshotCreateURL) > 0 {
|
||||
// create net/url object
|
||||
createUrl, err := url.Parse(*snapshotCreateURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse snapshotCreateURL: %s", err)
|
||||
}
|
||||
if len(*snapshotName) > 0 {
|
||||
logger.Fatalf("-snapshotName shouldn't be set if -snapshot.createURL is set, since snapshots are created automatically in this case")
|
||||
}
|
||||
logger.Infof("Snapshot create url %s", createUrl.Redacted())
|
||||
logger.Infof("Snapshot create url %s", *snapshotCreateURL)
|
||||
if len(*snapshotDeleteURL) <= 0 {
|
||||
err := flag.Set("snapshot.deleteURL", strings.Replace(*snapshotCreateURL, "/create", "/delete", 1))
|
||||
if err != nil {
|
||||
logger.Fatalf("Failed to set snapshot.deleteURL flag: %v", err)
|
||||
}
|
||||
}
|
||||
deleteUrl, err := url.Parse(*snapshotCreateURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse snapshotDeleteURL: %s", err)
|
||||
}
|
||||
logger.Infof("Snapshot delete url %s", deleteUrl.Redacted())
|
||||
logger.Infof("Snapshot delete url %s", *snapshotDeleteURL)
|
||||
|
||||
name, err := snapshot.Create(createUrl.String())
|
||||
name, err := snapshot.Create(*snapshotCreateURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create snapshot: %s", err)
|
||||
}
|
||||
@@ -81,7 +69,7 @@ func main() {
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err := snapshot.Delete(deleteUrl.String(), name)
|
||||
err := snapshot.Delete(*snapshotDeleteURL, name)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot delete snapshot: %s", err)
|
||||
}
|
||||
@@ -130,7 +118,7 @@ func main() {
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vmbackup performs backups for VictoriaMetrics data from instant snapshots to gcs, s3, azblob
|
||||
vmbackup performs backups for VictoriaMetrics data from instant snapshots to gcs, s3
|
||||
or local filesystem. Backed up data can be restored with vmrestore.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/vmbackup.html .
|
||||
@@ -157,7 +145,7 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
|
||||
fs := &fslocal.FS{
|
||||
Dir: snapshotPath,
|
||||
MaxBytesPerSecond: maxBytesPerSecond.IntN(),
|
||||
MaxBytesPerSecond: maxBytesPerSecond.N,
|
||||
}
|
||||
if err := fs.Init(); err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize fs: %w", err)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user