Compare commits

..

1 Commits

Author SHA1 Message Date
Max Kotliar
e9261be945 lib/promutil: Weak pointer based labels compressor 2025-08-20 20:02:11 +03:00
1991 changed files with 94033 additions and 141140 deletions

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env sh
set -e
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
GIT_REMOTE=${GIT_REMOTE:-"origin"}
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
if ! grep -q "^+" diff.txt; then
echo "No additions in CHANGELOG.md"
exit 0
fi
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
if [ -z "$START_TIP" ]; then
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
exit 1
fi
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
if [ -z "$END_TIP" ]; then
END_TIP=$(wc -l < "$CHANGELOG_FILE")
fi
BAD=0
while IFS= read -r line; do
# Grep exact line inside the file and get line numbers
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
for m in $MATCHES; do
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
BAD=1
fi
done
done << EOF
$ADDED_LINES
EOF
if [ "$BAD" -ne 0 ]; then
echo "CHANGELOG modifications must be placed inside the ## tip section."
exit 1
fi
echo "CHANGELOG modifications are valid."

View File

@@ -47,8 +47,6 @@ jobs:
arch: arm
- os: linux
arch: ppc64le
- os: linux
arch: s390x
- os: darwin
arch: amd64
- os: darwin
@@ -61,11 +59,11 @@ jobs:
arch: amd64
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum

View File

@@ -1,19 +0,0 @@
name: 'changelog-linter'
on:
pull_request:
paths:
- "docs/victoriametrics/changelog/CHANGELOG.md"
jobs:
tip-lint:
runs-on: 'ubuntu-latest'
steps:
- uses: 'actions/checkout@v6'
with:
# needed for proper diff
fetch-depth: 0
- name: 'Validate that changelog changes are under ## tip'
run: |
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh

View File

@@ -1,37 +0,0 @@
name: check-commit-signed
on:
pull_request:
jobs:
check-commit-signed:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0 # we need full history for commit verification
- name: Check commit signatures
run: |
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "Not a PR event, skipping signature check"
exit 0
fi
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
echo "Checking commits in PR range: $RANGE"
if [ -z "$(git rev-list $RANGE)" ]; then
echo "No new commits in this PR, skipping signature check"
exit 0
fi
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
if [ -n "$unsigned" ]; then
echo "Found unsigned commits:"
echo "$unsigned"
exit 1
fi
echo "All commits in PR are signed (G or E)"

View File

@@ -19,7 +19,7 @@ jobs:
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
go-version: stable
cache: false

View File

@@ -29,11 +29,11 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Set up Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache: false
go-version: stable
@@ -49,14 +49,14 @@ jobs:
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
uses: github/codeql-action/init@v3
with:
languages: go
- name: Autobuild
uses: github/codeql-action/autobuild@v4
uses: github/codeql-action/autobuild@v3
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
uses: github/codeql-action/analyze@v3
with:
category: 'language:go'

View File

@@ -16,12 +16,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
path: __vm
- name: Checkout private code
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }}

View File

@@ -32,11 +32,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
@@ -71,11 +71,11 @@ jobs:
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
@@ -97,11 +97,11 @@ jobs:
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum

View File

@@ -32,10 +32,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Node
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: '24.x'

View File

@@ -125,15 +125,6 @@ vmutils-linux-ppc64le: \
vmrestore-linux-ppc64le \
vmctl-linux-ppc64le
vmutils-linux-s390x: \
vmagent-linux-s390x \
vmalert-linux-s390x \
vmalert-tool-linux-s390x \
vmauth-linux-s390x \
vmbackup-linux-s390x \
vmrestore-linux-s390x \
vmctl-linux-s390x
vmutils-darwin-amd64: \
vmagent-darwin-amd64 \
vmalert-darwin-amd64 \
@@ -266,7 +257,6 @@ release-victoria-metrics: \
release-victoria-metrics-linux-amd64 \
release-victoria-metrics-linux-arm \
release-victoria-metrics-linux-arm64 \
release-victoria-metrics-linux-s390x \
release-victoria-metrics-darwin-amd64 \
release-victoria-metrics-darwin-arm64 \
release-victoria-metrics-freebsd-amd64 \
@@ -285,9 +275,6 @@ release-victoria-metrics-linux-arm:
release-victoria-metrics-linux-arm64:
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
@@ -327,7 +314,6 @@ release-vmutils: \
release-vmutils-linux-amd64 \
release-vmutils-linux-arm64 \
release-vmutils-linux-arm \
release-vmutils-linux-s390x \
release-vmutils-darwin-amd64 \
release-vmutils-darwin-arm64 \
release-vmutils-freebsd-amd64 \
@@ -346,9 +332,6 @@ release-vmutils-linux-arm64:
release-vmutils-linux-arm:
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
release-vmutils-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
release-vmutils-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
@@ -435,7 +418,7 @@ release-vmutils-windows-goarch: \
vmctl-windows-$(GOARCH)-prod.exe
pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
fmt:
gofmt -l -w -s ./lib
@@ -500,8 +483,7 @@ app-local-windows-goarch:
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
quicktemplate-gen: install-qtc
qtc -dir=lib
qtc -dir=app
qtc
install-qtc:
which qtc || go install github.com/valyala/quicktemplate/qtc@latest

View File

@@ -3,7 +3,7 @@
[![Latest Release](https://img.shields.io/github/v/release/VictoriaMetrics/VictoriaMetrics?sort=semver&label=&filter=!*-victorialogs&logo=github&labelColor=gray&color=gray&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Freleases%2Flatest)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)
[![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics?link=https%3A%2F%2Fgoreportcard.com%2Freport%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg?link=https%3A%2F%2Fcodecov.io%2Fgh%2FVictoriaMetrics%2FVictoriaMetrics)](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
[![License](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics?labelColor=green&label=&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Fblob%2Fmaster%2FLICENSE)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
![Slack](https://img.shields.io/badge/Join-4A154B?logo=slack&link=https%3A%2F%2Fslack.victoriametrics.com)

View File

@@ -4,11 +4,12 @@
The following versions of VictoriaMetrics receive regular security fixes:
| Version | Supported |
|--------------------------------------------------------------------------------|--------------------|
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
| Version | Supported |
|---------|--------------------|
| [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
See [this page](https://victoriametrics.com/security/) for more details.

View File

@@ -27,9 +27,6 @@ victoria-metrics-linux-ppc64le-prod:
victoria-metrics-linux-386-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
victoria-metrics-linux-s390x-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
victoria-metrics-darwin-amd64-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64

View File

@@ -169,7 +169,7 @@ func usage() {
const s = `
victoria-metrics is a time series database and monitoring solution.
See the docs at https://docs.victoriametrics.com/victoriametrics/
See the docs at https://docs.victoriametrics.com/
`
flagutil.Usage(s)
}

View File

@@ -27,9 +27,6 @@ vmagent-linux-ppc64le-prod:
vmagent-linux-386-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
vmagent-linux-s390x-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
vmagent-darwin-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -75,7 +74,7 @@ var (
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
@@ -253,8 +252,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
{"metrics", "available service metrics"},
{"flags", "command-line flags"},
{"-/reload", "reload configuration"},
@@ -351,17 +348,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
firehose.WriteSuccessResponse(w, r)
return true
case "/zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -491,42 +477,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
promscrape.WriteConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteURLRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteURLRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/prometheus/-/reload", "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true
@@ -656,17 +606,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
}
firehose.WriteSuccessResponse(w, r)
return true
case "zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -788,9 +727,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
@@ -811,12 +747,6 @@ var (
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
)

View File

@@ -2,14 +2,13 @@ package opentelemetry
import (
"fmt"
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -25,13 +24,6 @@ var (
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
)
// InsertHandler processes metrics from given reader.
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, nil)
})
}
// InsertHandler processes opentelemetry metrics.
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
@@ -76,7 +68,7 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int
if prommetadata.IsEnabled() {
if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID

View File

@@ -7,8 +7,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -36,7 +36,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return insertRows(at, rows, mms, extraLabels)
}, func(s string) {
httpserver.LogError(req, s)

View File

@@ -6,8 +6,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
@@ -71,7 +71,7 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int
if prommetadata.IsEnabled() {
if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID

View File

@@ -463,6 +463,12 @@ again:
// - Real-world implementations of v1 use both 400 and 415 status codes.
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
case 415, 400:
if c.canDowngradeVMProto.Swap(false) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
c.useVMProto.Store(false)
}
if encoding.IsZstd(block) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)

View File

@@ -93,7 +93,10 @@ func TestParseRetryAfterHeader(t *testing.T) {
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
func helper(d time.Duration) time.Duration {
dv := min(d/10, 10*time.Second)
dv := d / 10
if dv > 10*time.Second {
dv = 10 * time.Second
}
return d + dv
}

View File

@@ -3,18 +3,15 @@ package remotewrite
import (
"flag"
"fmt"
"io"
"strconv"
"strings"
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"go.yaml.in/yaml/v3"
"github.com/VictoriaMetrics/metrics"
)
@@ -35,12 +32,9 @@ var (
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
)
var labelsGlobal []prompb.Label
var (
labelsGlobal []prompb.Label
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
remoteWriteURLRelabelConfigData atomic.Pointer[[]interface{}]
relabelConfigReloads *metrics.Counter
relabelConfigReloadErrors *metrics.Counter
relabelConfigSuccess *metrics.Gauge
@@ -73,42 +67,6 @@ func initRelabelConfigs() {
}
}
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
func WriteRelabelConfigData(w io.Writer) {
p := remoteWriteRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
_, _ = w.Write(*p)
}
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
func WriteURLRelabelConfigData(w io.Writer) {
p := remoteWriteURLRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
type urlRelabelCfg struct {
Url string `yaml:"url"`
RelabelConfig interface{} `yaml:"relabel_config"`
}
var cs []urlRelabelCfg
for i, url := range *remoteWriteURLs {
cfgData := (*p)[i]
if !*showRemoteWriteURL {
url = fmt.Sprintf("%d:secret-url", i+1)
}
cs = append(cs, urlRelabelCfg{
Url: url,
RelabelConfig: cfgData,
})
}
d, _ := yaml.Marshal(cs)
_, _ = w.Write(d)
}
func reloadRelabelConfigs() {
rcs := allRelabelConfigs.Load()
if !rcs.isSet() {
@@ -132,42 +90,28 @@ func reloadRelabelConfigs() {
func loadRelabelConfigs() (*relabelConfigs, error) {
var rcs relabelConfigs
if *relabelConfigPathGlobal != "" {
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
if err != nil {
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
}
remoteWriteRelabelConfigData.Store(&rawCfg)
rcs.global = global
}
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
}
var urlRelabelCfgs []interface{}
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
for i, path := range *relabelConfigPaths {
if len(path) == 0 {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
// Skip empty relabel config.
continue
}
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path)
prc, err := promrelabel.LoadRelabelConfigs(path)
if err != nil {
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
}
rcs.perURL[i] = prc
var parsedCfg interface{}
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
}
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
// fill the urlRelabelCfgs with empty relabel configs if not set
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
}
}
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
return &rcs, nil
}

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
"github.com/VictoriaMetrics/metrics"
@@ -486,9 +485,6 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
if !*streamAggrGlobalKeepInput {
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
} else if *streamAggrGlobalDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
}
matchIdxsPool.Put(matchIdxs)
}
@@ -992,17 +988,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
tss = append(*v, tss...)
}
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
} else if rwctx.streamAggrDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
if rctx == nil {
rctx = getRelabelCtx()
// Make a copy of tss before dropping aggregated series
v = tssPool.Get().(*[]prompb.TimeSeries)
tss = append(*v, tss...)
}
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
}
matchIdxsPool.Put(matchIdxs)
}
if rwctx.deduplicator != nil {
@@ -1025,10 +1011,9 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
return false
}
var matchIdxsPool slicesutil.BufferPool[uint32]
var matchIdxsPool bytesutil.ByteBufferPool
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true.
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
dst := src[:0]
if !dropInput {
for i, match := range matchIdxs {
@@ -1043,20 +1028,6 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput
return dst
}
// dropUnaggregatedSeries drops unmatched series.
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
dst := src[:0]
for i, match := range matchIdxs {
if match == 0 {
continue
}
dst = append(dst, src[i])
}
tail := src[len(dst):]
clear(tail)
return dst
}
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
if rwctx.tryPushTimeSeriesInternal(tss) {
return

View File

@@ -10,8 +10,6 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
@@ -59,8 +57,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
f(10)
}
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) {
func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
t.Helper()
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
if err != nil {
@@ -73,16 +71,10 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
}
allRelabelConfigs.Store(rcs)
path := "fast-queue-write-test"
fs.MustRemoveDir(path)
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
defer fs.MustRemoveDir(path)
defer fq.MustClose()
pss := make([]*pendingSeries, 1)
isVMProto := &atomic.Bool{}
isVMProto.Store(true)
pss[0] = newPendingSeries(fq, isVMProto, 0, 100)
pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
rwctx := &remoteWriteCtx{
idx: 0,
streamAggrKeepInput: keepInput,
@@ -91,8 +83,6 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
}
defer metrics.UnregisterAllMetrics()
if dedupInterval > 0 {
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
}
@@ -114,27 +104,23 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
expectedTss := make([]prompb.TimeSeries, len(inputTss))
// check inputTss is not modified after TryPushTimeSeries
// copy inputTss to make sure it is not mutated during TryPush call
copy(expectedTss, inputTss)
if !rwctx.TryPushTimeSeries(inputTss, false) {
t.Fatalf("cannot push samples to rwctx")
}
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
}
if len(pss[0].wr.tss) != expectedPushedSample {
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
}
if !reflect.DeepEqual(expectedTss, inputTss) {
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
}
}
// relabeling
f(``, `
f(`
- interval: 1m
outputs: [sum_samples]
- interval: 2m
outputs: [count_series]
`, `
- action: keep
source_labels: [env]
regex: "dev"
@@ -143,66 +129,53 @@ metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 2, 2)
// relabeling + aggregation
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, `
- action: keep
source_labels: [env]
regex: ".*"
`, false, 0, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 2)
// aggregation + keepInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 4)
// aggregation + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, false, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 0)
// aggregation + keepInput + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="bar"} 25
`, 3, 1)
// aggregation + deduplication
`)
f(``, ``, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="foo"} 20
metric{env="dev"} 15
metric{env="foo"} 25
`, 4, 0)
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, false, `
metric{env="test"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, true, `
metric{env="foo"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, true, `
metric{env="dev"} 10
metric{env="test"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
}
func TestShardAmountRemoteWriteCtx(t *testing.T) {

View File

@@ -18,12 +18,12 @@ var (
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
"with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
"aggregator before optional aggregation with -streamAggr.config . "+
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
@@ -43,11 +43,11 @@ var (
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")

View File

@@ -1,80 +0,0 @@
package zabbixconnector
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
)
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := len(rows)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
samples = append(samples, prompb.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -27,9 +27,6 @@ vmalert-tool-linux-ppc64le-prod:
vmalert-tool-linux-386-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
vmalert-tool-linux-s390x-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
vmalert-tool-darwin-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64

View File

@@ -34,7 +34,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/metrics"
)
@@ -85,8 +84,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
defer server.Close()
} else {
httpListenAddr = httpListenPort
ln, err := net.Listen(netutil.GetTCPNetwork(), fmt.Sprintf(":%s", httpListenPort))
ln, err := net.Listen("tcp", fmt.Sprintf(":%s", httpListenPort))
if err != nil {
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
}
@@ -132,7 +130,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
}
labels[s[:n]] = s[n+1:]
}
err = notifier.Init(labels, externalURL)
_, err = notifier.Init(nil, labels, externalURL)
if err != nil {
logger.Fatalf("failed to init notifier: %v", err)
}
@@ -379,7 +377,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
if len(g.Rules) == 0 {
continue
}
errs := g.ExecOnce(context.Background(), rw, ts)
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
for err := range errs {
if err != nil {
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,

View File

@@ -27,9 +27,6 @@ vmalert-linux-ppc64le-prod:
vmalert-linux-386-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
vmalert-linux-s390x-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
vmalert-darwin-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64

View File

@@ -31,7 +31,7 @@ type Group struct {
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
Limit *int `yaml:"limit,omitempty"`
Limit int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"`
// Labels is a set of label value pairs, that will be added to every rule.
@@ -91,8 +91,8 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
if g.EvalOffset != nil && g.EvalDelay != nil {
return fmt.Errorf("eval_offset cannot be used with eval_delay")
}
if g.Limit != nil && *g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit)
if g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
}
if g.Concurrency < 0 {
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)

View File

@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
@@ -181,10 +181,9 @@ func TestGroupValidate_Failure(t *testing.T) {
EvalOffset: promutil.NewDuration(2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
limit := -1
f(&Group{
Name: "wrong limit",
Limit: &limit,
Limit: -1,
}, false, "invalid limit")
f(&Group{
@@ -343,6 +342,7 @@ func TestGroupValidate_Failure(t *testing.T) {
},
},
}, true, "bad prometheus expr")
}
func TestGroupValidate_Success(t *testing.T) {

View File

@@ -173,26 +173,22 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
defer func() { _ = resp.Body.Close() }()
// Process the received response.
var parseFn func(resp *http.Response) (Result, error)
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusInstantResponse
parseFn = parsePrometheusResponse
case datasourceGraphite:
parseFn = parseGraphiteResponse
case datasourceVLogs:
parseFn = parseVLogsInstantResponse
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
}
result, err := parseFn(resp)
if err != nil {
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return result, req, nil
result, err := parseFn(req, resp)
_ = resp.Body.Close()
return result, req, err
}
// QueryRange executes the given query on the given time range.
@@ -233,23 +229,19 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
return res, fmt.Errorf("second attempt: %w", err)
}
}
defer func() { _ = resp.Body.Close() }()
// Process the received response.
var parseFn func(resp *http.Response) (Result, error)
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusRangeResponse
parseFn = parsePrometheusResponse
case datasourceVLogs:
parseFn = parseVLogsRangeResponse
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
}
res, err = parseFn(resp)
if err != nil {
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
res, err = parseFn(req, resp)
_ = resp.Body.Close()
return res, err
}

View File

@@ -33,10 +33,10 @@ func (r graphiteResponse) metrics() []Metric {
return ms
}
func parseGraphiteResponse(resp *http.Response) (Result, error) {
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
r := &graphiteResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err)
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
}
return Result{Data: r.metrics()}, nil
}

View File

@@ -172,26 +172,17 @@ const (
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
)
func parsePromResponse(resp *http.Response) (*promResponse, error) {
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
r := &promResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
}
if r.Status == statusError {
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error)
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
}
if r.Status != statusSuccess {
return nil, fmt.Errorf("unknown response status %q", r.Status)
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
}
return r, nil
}
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
var parseFn func() ([]Metric, error)
switch r.Data.ResultType {
case rtVector:
@@ -200,6 +191,12 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
}
parseFn = pi.metrics
case rtMatrix:
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
parseFn = pr.metrics
case rScalar:
var ps promScalar
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
@@ -209,6 +206,7 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
default:
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
}
ms, err := parseFn()
if err != nil {
return res, err
@@ -224,34 +222,6 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, nil
}
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
if r.Data.ResultType != rtMatrix {
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
}
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
ms, err := pr.metrics()
if err != nil {
return res, err
}
res = Result{Data: ms, IsPartial: r.IsPartial}
if r.Stats.SeriesFetched != nil {
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
if err != nil {
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
}
res.SeriesFetched = &intV
}
return res, nil
}
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
if c.appendTypePrefix {
r.URL.Path += "/prometheus"

View File

@@ -65,23 +65,21 @@ func TestVMInstantQuery(t *testing.T) {
case 3:
w.Write([]byte(`{"status":"unknown"}`))
case 4:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`))
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
case 5:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
case 7:
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
case 8:
case 7:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
case 9:
case 8:
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
}
})
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
c++
switch c {
case 10:
case 9:
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
}
})
@@ -104,9 +102,9 @@ func TestVMInstantQuery(t *testing.T) {
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
}
switch c {
case 11:
case 10:
w.Write([]byte("[]"))
case 12:
case 11:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
}
})
@@ -125,7 +123,6 @@ func TestVMInstantQuery(t *testing.T) {
ts := time.Now()
expErr := func(query, err string) {
t.Helper()
_, _, gotErr := pq.Query(ctx, query, ts)
if gotErr == nil {
t.Fatalf("expected %q got nil", err)
@@ -138,11 +135,10 @@ func TestVMInstantQuery(t *testing.T) {
expErr(vmQuery, "500") // 0
expErr(vmQuery, "error parsing response") // 1
expErr(vmQuery, "response error") // 2
expErr(vmQuery, "unknown response status") // 3
expErr(vmQuery, "unknown status") // 3
expErr(vmQuery, "unexpected end of JSON input") // 4
expErr(vmQuery, "unknown result type") // 5
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -163,7 +159,7 @@ func TestVMInstantQuery(t *testing.T) {
}
metricsEqual(t, res.Data, expected)
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -188,7 +184,7 @@ func TestVMInstantQuery(t *testing.T) {
res.SeriesFetched)
}
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -209,7 +205,7 @@ func TestVMInstantQuery(t *testing.T) {
*res.SeriesFetched)
}
res, _, err = pq.Query(ctx, vmQuery, ts) // 9
res, _, err = pq.Query(ctx, vmQuery, ts) // 8
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -220,7 +216,7 @@ func TestVMInstantQuery(t *testing.T) {
// test graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite
res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -240,9 +236,9 @@ func TestVMInstantQuery(t *testing.T) {
vlogs := datasourceVLogs
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
expErr(vlogsQuery, "error parsing response") // 11
expErr(vlogsQuery, "error parsing response") // 10
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -394,8 +390,6 @@ func TestVMRangeQuery(t *testing.T) {
switch c {
case 0:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
}
})
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
@@ -428,7 +422,7 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
}
switch c {
case 2:
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
}
})
@@ -452,13 +446,13 @@ func TestVMRangeQuery(t *testing.T) {
start, end := time.Now().Add(-time.Minute), time.Now()
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0
res, err := pq.QueryRange(ctx, vmQuery, start, end)
if err != nil {
t.Fatalf("unexpected %s", err)
}
m := res.Data
if len(m) != 1 {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
}
expected := Metric{
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
@@ -469,9 +463,6 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
}
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
expectError(t, err, "unexpected result type")
// test unsupported graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})

View File

@@ -40,28 +40,8 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
c.setReqParams(r, query)
}
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) {
res, err = parsePrometheusInstantResponse(resp)
if err != nil {
return Result{}, err
}
for i := range res.Data {
m := &res.Data[i]
for j := range m.Labels {
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
// since there could be multiple stats results in a single query, for instance:
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
if m.Labels[j].Name == "__name__" {
m.Labels[j].Name = "stats_result"
break
}
}
}
return
}
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
res, err = parsePrometheusRangeResponse(resp)
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
res, err = parsePrometheusResponse(req, resp)
if err != nil {
return Result{}, err
}

View File

@@ -132,7 +132,10 @@ func (ls Labels) String() string {
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
func LabelCompare(a, b Labels) int {
l := min(len(b), len(a))
l := len(a)
if len(b) < l {
l = len(b)
}
for i := 0; i < l; i++ {
if a[i].Name != b[i].Name {

View File

@@ -7,6 +7,7 @@ import (
"net/url"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
@@ -76,13 +77,14 @@ absolute path to all .tpl files in root.
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
"In case of conflicts, original labels are kept with prefix 'exported_'.")
"In case of conflicts, original labels are kept with prefix `exported_`.")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
)
var (
extURL *url.URL
alertURLGeneratorFn notifier.AlertURLGenerator
extURL *url.URL
)
func main() {
@@ -119,7 +121,7 @@ func main() {
return
}
err = notifier.InitAlertURLGeneratorFn(extURL, *externalAlertSource, *validateTemplates)
alertURLGeneratorFn, err = getAlertURLGenerator(extURL, *externalAlertSource, *validateTemplates)
if err != nil {
logger.Fatalf("failed to init `external.alert.source`: %s", err)
}
@@ -226,13 +228,14 @@ func newManager(ctx context.Context) (*manager, error) {
labels[s[:n]] = s[n+1:]
}
err = notifier.Init(labels, *externalURL)
nts, err := notifier.Init(alertURLGeneratorFn, labels, *externalURL)
if err != nil {
return nil, fmt.Errorf("failed to init notifier: %w", err)
}
manager := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: q,
notifiers: nts,
labels: labels,
}
rw, err := remotewrite.Init(ctx)
@@ -289,6 +292,35 @@ func getHostnameAsExternalURL(addr string, isSecure bool) (*url.URL, error) {
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
}
func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
if externalAlertSource == "" {
return func(a notifier.Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
}, nil
}
if validateTemplate {
if err := notifier.ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
return func(alert notifier.Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}, nil
}
func usage() {
const s = `
vmalert processes alerts and recording rules.

View File

@@ -49,6 +49,30 @@ func TestGetExternalURL(t *testing.T) {
}
}
func TestGetAlertURLGenerator(t *testing.T) {
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
fn, err := getAlertURLGenerator(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
if exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
}
func TestConfigReload(t *testing.T) {
originalRulePath := *rulePath
originalExternalURL := extURL
@@ -96,10 +120,9 @@ groups:
querierBuilder: &datasource.FakeQuerier{},
groups: make(map[uint64]*rule.Group),
labels: map[string]string{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
rw: &remotewrite.Client{},
}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
syncCh := make(chan struct{})
sighupCh := procutil.NewSighupChan()

View File

@@ -3,7 +3,6 @@ package main
import (
"context"
"fmt"
"strconv"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
@@ -17,6 +16,7 @@ import (
// manager controls group states
type manager struct {
querierBuilder datasource.QuerierBuilder
notifiers func() []notifier.Notifier
rw remotewrite.RWClient
// remote read builder.
@@ -29,8 +29,25 @@ type manager struct {
groups map[uint64]*rule.Group
}
// groupAPI generates apiGroup object from group by its ID(hash)
func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
g, ok := m.groups[gID]
if !ok {
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
for _, rule := range g.Rules {
if rule.ID() == rID {
return ruleToAPI(rule), nil
}
}
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
@@ -38,47 +55,13 @@ func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
return g.ToAPI(), nil
}
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
ruleID := strconv.FormatUint(rID, 10)
for _, r := range g.Rules {
if r.ID == ruleID {
return r, nil
}
}
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
ar, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
alertID := strconv.FormatUint(aID, 10)
for _, a := range r.Alerts {
if a.ID == alertID {
return a, nil
}
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
return apiAlert, nil
}
}
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
@@ -99,16 +82,17 @@ func (m *manager) close() {
}
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
m.wg.Add(1)
id := g.GetID()
g.Init()
m.wg.Go(func() {
go func() {
defer m.wg.Done()
if restore {
g.Start(ctx, m.rw, m.rr)
g.Start(ctx, m.notifiers, m.rw, m.rr)
} else {
g.Start(ctx, m.rw, nil)
g.Start(ctx, m.notifiers, m.rw, nil)
}
})
}()
m.groups[id] = g
return nil
}
@@ -135,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if rrPresent && m.rw == nil {
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
}
if arPresent && notifier.GetTargets() == nil {
if arPresent && m.notifiers == nil {
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
}
@@ -172,15 +156,15 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if len(toUpdate) > 0 {
var wg sync.WaitGroup
for _, item := range toUpdate {
oldG := item.old
newG := item.new
wg.Go(func() {
// cancel evaluation so the Update will be applied as fast as possible.
// it is important to call InterruptEval before the update, because cancel fn
// can be re-assigned during the update.
oldG.InterruptEval()
oldG.UpdateWith(newG)
})
wg.Add(1)
// cancel evaluation so the Update will be applied as fast as possible.
// it is important to call InterruptEval before the update, because cancel fn
// can be re-assigned during the update.
item.old.InterruptEval()
go func(oldGroup *rule.Group, newGroup *rule.Group) {
oldGroup.UpdateWith(newGroup)
wg.Done()
}(item.old, item.new)
}
wg.Wait()
}

View File

@@ -40,11 +40,10 @@ func TestManagerEmptyRulesDir(t *testing.T) {
// execution of configuration update.
// Should be executed with -race flag
func TestManagerUpdateConcurrent(t *testing.T) {
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
}
paths := []string{
"config/testdata/dir/rules0-good.rules",
@@ -128,9 +127,8 @@ func TestManagerUpdate_Success(t *testing.T) {
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
cfgInit := loadCfg(t, []string{initPath}, true, true)
if err := m.update(ctx, cfgInit, false); err != nil {
@@ -279,8 +277,7 @@ func TestManagerUpdate_Failure(t *testing.T) {
rw: rw,
}
if notifiers != nil {
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
m.notifiers = func() []notifier.Notifier { return notifiers }
}
err := m.update(context.Background(), []config.Group{cfg}, false)
if err == nil {

View File

@@ -166,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
ctmpl, _ := tmpl.Clone()
ctmpl = ctmpl.Option("missingkey=zero")
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
r[key] = err.Error()
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err))
r[key] = text
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
continue
}
r[key] = buf.String()
@@ -184,13 +184,13 @@ type tplData struct {
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
tpl, err := tpl.Parse(text)
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
return fmt.Errorf("error parsing annotation template: %w", err)
}
if !execute {
return nil
}
if err = tpl.Execute(dst, data); err != nil {
return fmt.Errorf("error evaluating template: %w", err)
return fmt.Errorf("error evaluating annotation template: %w", err)
}
return nil
}

View File

@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
)
extLabels["cluster"] = extCluster
extLabels["dc"] = extDC
err := Init(extLabels, extURL)
_, err := Init(nil, extLabels, extURL)
checkErr(t, err)
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {

View File

@@ -3,7 +3,6 @@ package notifier
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
@@ -23,11 +22,10 @@ import (
// AlertManager represents integration provider with Prometheus alert manager
// https://github.com/prometheus/alertmanager
type AlertManager struct {
addr *url.URL
argFunc AlertURLGenerator
client *http.Client
timeout time.Duration
lastError string
addr *url.URL
argFunc AlertURLGenerator
client *http.Client
timeout time.Duration
authCfg *promauth.Config
// stores already parsed RelabelConfigs object
@@ -73,42 +71,24 @@ func (am AlertManager) Addr() string {
return am.addr.Redacted()
}
func (am *AlertManager) LastError() string {
return am.lastError
}
// Send an alert or resolve message
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
if len(alerts) != len(alertLabels) {
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
}
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
am.metrics.alertsSent.Add(len(alerts))
startTime := time.Now()
err := am.send(ctx, alerts, alertLabels, headers)
err := am.send(ctx, alerts, headers)
am.metrics.alertsSendDuration.UpdateDuration(startTime)
if err != nil {
// the context can be cancelled on graceful shutdown
// or on group update. So no need to handle the error as usual.
if errors.Is(err, context.Canceled) {
return nil
}
am.metrics.alertsSendErrors.Add(len(alerts))
am.lastError = err.Error()
} else {
am.lastError = ""
}
return err
}
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
b := &bytes.Buffer{}
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
for i, a := range alerts {
lbls := alertLabels[i]
if am.relabelConfigs != nil {
lbls = am.relabelConfigs.Apply(lbls, 0)
}
for _, a := range alerts {
lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
if len(lbls) == 0 {
continue
}

View File

@@ -11,7 +11,6 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
@@ -146,11 +145,11 @@ func TestAlertManager_Send(t *testing.T) {
t.Fatalf("unexpected error: %s", err)
}
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected connection error got nil")
}
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected wrong http code error got nil")
}
@@ -161,7 +160,7 @@ func TestAlertManager_Send(t *testing.T) {
End: time.Now().UTC(),
Labels: map[string]string{"alertname": "alert0"},
Annotations: map[string]string{"a": "b", "c": "d"},
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil {
}}, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -175,7 +174,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2",
Labels: map[string]string{"rule": "test", "tenant": "1"},
},
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil {
}, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -188,7 +187,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2",
Labels: map[string]string{},
},
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil {
}, map[string]string{}); err != nil {
t.Fatalf("unexpected error %s", err)
}

View File

@@ -27,9 +27,15 @@ type Config struct {
// PathPrefix is added to URL path before adding alertManagerPath value
PathPrefix string `yaml:"path_prefix,omitempty"`
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"`
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"`
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
// ConsulSDConfigs contains list of settings for service discovery via Consul
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
// DNSSDConfigs contains list of settings for service discovery via DNS.
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
// StaticConfigs contains list of static targets
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
// HTTPClientConfig contains HTTP configuration for Notifier clients
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
@@ -56,29 +62,14 @@ type Config struct {
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
}
// staticConfig contains list of static targets in the following form:
// StaticConfig contains list of static targets in the following form:
//
// targets:
// [ - '<host>' ]
type StaticConfig struct {
Targets []string `yaml:"targets"`
// HTTPClientConfig contains HTTP configuration for the Targets
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// ConsulSDConfigs contains list of settings for service discovery via Consul,
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
type ConsulSDConfigs struct {
consul.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// DNSSDConfigs contains list of settings for service discovery via DNS,
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
type DNSSDConfigs struct {
dns.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -104,31 +95,6 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
}
cfg.parsedAlertRelabelConfigs = arCfg
for _, s := range cfg.StaticConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
}
}
}
for _, s := range cfg.ConsulSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
}
}
}
for _, s := range cfg.DNSSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
}
}
}
b, err := yaml.Marshal(cfg)
if err != nil {
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)

View File

@@ -35,6 +35,4 @@ func TestParseConfig_Failure(t *testing.T) {
f("testdata/unknownFields.bad.yaml", "unknown field")
f("non-existing-file", "error reading")
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
}

View File

@@ -8,7 +8,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
@@ -29,7 +28,11 @@ type configWatcher struct {
targets map[TargetType][]Target
}
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) {
func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
cfg, err := parseConfig(path)
if err != nil {
return nil, err
}
cw := &configWatcher{
cfg: cfg,
wg: sync.WaitGroup{},
@@ -85,15 +88,18 @@ func (cw *configWatcher) reload(path string) error {
return cw.start()
}
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error {
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors {
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
}
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
cw.wg.Go(func() {
cw.wg.Add(1)
go func() {
defer cw.wg.Done()
ticker := time.NewTicker(interval)
defer ticker.Stop()
@@ -103,77 +109,62 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
return
case <-ticker.C:
}
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors {
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
}
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
}
})
}()
return nil
}
type targetMetadata struct {
*promutil.Labels
alertRelabelConfigs *promrelabel.ParsedConfigs
}
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
metaLabelsList, alertRelabelCfgs, err := targetsFn()
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
metaLabels, err := labelsFn()
if err != nil {
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
}
targetMts := make(map[string]targetMetadata, len(metaLabelsList))
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
var errors []error
duplicates := make(map[string]struct{})
for i := range metaLabelsList {
metaLabels := metaLabelsList[i]
alertRelabelCfg := alertRelabelCfgs[i]
for _, labels := range metaLabels {
target := labels.Get("__address__")
u, processedLabels, err := parseLabels(target, labels, cfg)
if err != nil {
errors = append(errors, err)
continue
}
if len(u) == 0 {
continue
}
// check for duplicated targets
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
if _, ok := duplicates[u]; ok {
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMts[u] = targetMetadata{
Labels: processedLabels,
alertRelabelConfigs: alertRelabelCfg,
}
for _, labels := range metaLabels {
target := labels.Get("__address__")
u, processedLabels, err := parseLabels(target, labels, cfg)
if err != nil {
errors = append(errors, err)
continue
}
if len(u) == 0 {
continue
}
if _, ok := duplicates[u]; ok { // check for duplicates
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMetadata[u] = processedLabels
}
return targetMts, errors
return targetMetadata, errors
}
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error)
type getLabels func() ([]*promutil.Labels, error)
func (cw *configWatcher) start() error {
if len(cw.cfg.StaticConfigs) > 0 {
var targets []Target
for i, cfg := range cw.cfg.StaticConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
for _, cfg := range cw.cfg.StaticConfigs {
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
for _, target := range cfg.Targets {
address, labels, err := parseLabels(target, nil, cw.cfg)
if err != nil {
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
}
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration())
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
if err != nil {
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
}
@@ -187,20 +178,17 @@ func (cw *configWatcher) start() error {
}
if len(cw.cfg.ConsulSDConfigs) > 0 {
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
var labels [][]*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels []*promutil.Labels
for i := range cw.cfg.ConsulSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.ConsulSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err)
return nil, fmt.Errorf("got labels err: %w", err)
}
labels = append(labels, targetLabels)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
labels = append(labels, targetLabels...)
}
return labels, alertRelabelConfigs, nil
return labels, nil
})
if err != nil {
return fmt.Errorf("failed to start consulSD discovery: %w", err)
@@ -208,21 +196,17 @@ func (cw *configWatcher) start() error {
}
if len(cw.cfg.DNSSDConfigs) > 0 {
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
var labels [][]*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels []*promutil.Labels
for i := range cw.cfg.DNSSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.DNSSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err)
return nil, fmt.Errorf("got labels err: %w", err)
}
labels = append(labels, targetLabels)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
labels = append(labels, targetLabels...)
}
return labels, alertRelabelConfigs, nil
return labels, nil
})
if err != nil {
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
@@ -256,30 +240,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
cw.targetsMu.Unlock()
}
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) {
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
cw.targetsMu.Lock()
defer cw.targetsMu.Unlock()
oldTargets := cw.targets[key]
var updatedTargets []Target
for _, ot := range oldTargets {
if _, ok := targetMts[ot.Addr()]; !ok {
if _, ok := targetMetadata[ot.Addr()]; !ok {
// if target not exists in currentTargets, close it
ot.Close()
} else {
updatedTargets = append(updatedTargets, ot)
delete(targetMts, ot.Addr())
delete(targetMetadata, ot.Addr())
}
}
// create new resources for the new targets
for addr, metadata := range targetMts {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
for addr, labels := range targetMetadata {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
if err != nil {
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
continue
}
updatedTargets = append(updatedTargets, Target{
Notifier: am,
Labels: metadata.Labels,
Labels: labels,
})
}

View File

@@ -7,7 +7,6 @@ import (
"net/http/httptest"
"os"
"sync"
"sync/atomic"
"testing"
"time"
@@ -29,11 +28,7 @@ static_configs:
- localhost:9093
- localhost:9094
`)
cfg, err := parseConfig(f.Name())
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(f.Name(), nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
@@ -88,64 +83,33 @@ consul_sd_configs:
- server: %s
services:
- alertmanager
- server: %s
services:
- alertmanager
alert_relabel_configs:
- target_label: "foo"
replacement: "tar"
`, consulSDServer.URL, consulSDServer.URL))
`, consulSDServer.URL))
cfg, err := parseConfig(consulSDFile.Name())
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(consulSDFile.Name(), nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
defer cw.mustStop()
if len(cw.notifiers()) != 3 {
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers()))
if len(cw.notifiers()) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
}
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2]
n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
if n1.Addr() != expAddr1 {
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
}
if n2.Addr() != expAddr2 {
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
}
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n1.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
}
if n2.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
f := func() bool { return len(cw.notifiers()) == 1 }
if !waitFor(f, time.Second) {
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
}
n3 = cw.notifiers()[0]
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
}
// TestConfigWatcherReloadConcurrent supposed to test concurrent
@@ -200,11 +164,7 @@ consul_sd_configs:
"unknownFields.bad.yaml",
}
cfg, err := parseConfig(paths[0])
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(paths[0], nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
@@ -242,11 +202,10 @@ func checkErr(t *testing.T, err error) {
const (
fakeConsulService1 = "127.0.0.1:9093"
fakeConsulService2 = "127.0.0.1:9095"
fakeConsulService3 = "127.0.0.1:9097"
)
func newFakeConsulServer() *httptest.Server {
var requestCount atomic.Int32
requestCount := 0
mux := http.NewServeMux()
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
@@ -261,7 +220,7 @@ func newFakeConsulServer() *httptest.Server {
}`))
})
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
if requestCount.Load() == 0 {
if requestCount == 0 {
rw.Header().Set("X-Consul-Index", "1")
rw.Write([]byte(`
[
@@ -401,7 +360,7 @@ func newFakeConsulServer() *httptest.Server {
}
]`))
}
requestCount.Add(1)
requestCount++
})
return httptest.NewServer(mux)

View File

@@ -5,8 +5,6 @@ import (
"fmt"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
// FakeNotifier is a mock notifier
@@ -17,32 +15,14 @@ type FakeNotifier struct {
counter int
}
// InitFakeNotifier initializes global notifier to FakeNotifier,
// and returns a cleanup function to restore the original getActiveNotifiers.
func InitFakeNotifier() (*FakeNotifier, func()) {
originalGetActiveNotifiers := getActiveNotifiers
fn := &FakeNotifier{}
getActiveNotifiers = func() []Notifier {
return []Notifier{fn}
}
return fn, func() {
getActiveNotifiers = originalGetActiveNotifiers
}
}
// Close does nothing
func (*FakeNotifier) Close() {}
// LastError returns last error message
func (*FakeNotifier) LastError() string {
return ""
}
// Addr returns ""
func (*FakeNotifier) Addr() string { return "" }
// Send sets alerts and increases counter
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error {
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
fn.Lock()
defer fn.Unlock()
fn.counter += len(alerts)

View File

@@ -1,22 +1,14 @@
package notifier
import (
"context"
"flag"
"fmt"
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
)
@@ -65,61 +57,11 @@ var (
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
)
// AlertURLGeneratorFn returns a URL to the passed alert object.
// Call InitAlertURLGeneratorFn before using this function.
var AlertURLGeneratorFn AlertURLGenerator
// InitAlertURLGeneratorFn populates AlertURLGeneratorFn
func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, validateTemplate bool) error {
if externalAlertSource == "" {
AlertURLGeneratorFn = func(a Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, "group_id", gID, "alert_id", aID)
}
return nil
}
if validateTemplate {
if err := ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
AlertURLGeneratorFn = func(alert Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}
return nil
}
var (
// getActiveNotifiers returns the current list of Notifier objects.
getActiveNotifiers func() []Notifier
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
globalRelabelCfg *promrelabel.ParsedConfigs
// cw holds a configWatcher for configPath configuration file
// configWatcher provides a list of Notifier objects discovered
// from static config or via service discovery.
// cw is not nil only if configPath is provided.
cw *configWatcher
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// cw holds a configWatcher for configPath configuration file
// configWatcher provides a list of Notifier objects discovered
// from static config or via service discovery.
// cw is not nil only if configPath is provided.
var cw *configWatcher
// Reload checks the changes in configPath configuration file
// and applies changes if any.
@@ -130,62 +72,66 @@ func Reload() error {
return cw.reload(*configPath)
}
var staticNotifiersFn func() []Notifier
var (
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// Init returns a function for retrieving actual list of Notifier objects.
// Init works in two mods:
// - configuration via flags (for backward compatibility). Is always static
// and don't support live reloads.
// - configuration via file. Supports live reloads and service discovery.
//
// Init returns an error if both mods are used.
func Init(extLabels map[string]string, extURL string) error {
func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
externalURL = extURL
externalLabels = extLabels
_, err := url.Parse(externalURL)
if err != nil {
return fmt.Errorf("failed to parse external URL: %w", err)
return nil, fmt.Errorf("failed to parse external URL: %w", err)
}
if *blackHole {
if len(*addrs) > 0 || *configPath != "" {
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
}
notifier := newBlackHoleNotifier()
getActiveNotifiers = func() []Notifier {
staticNotifiersFn = func() []Notifier {
return []Notifier{notifier}
}
return nil
return staticNotifiersFn, nil
}
if *configPath == "" && len(*addrs) == 0 {
return nil
return nil, nil
}
if *configPath != "" && len(*addrs) > 0 {
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
}
if len(*addrs) > 0 {
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn)
notifiers, err := notifiersFromFlags(gen)
if err != nil {
return fmt.Errorf("failed to create notifier from flag values: %w", err)
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
}
getActiveNotifiers = func() []Notifier {
staticNotifiersFn = func() []Notifier {
return notifiers
}
return nil
return staticNotifiersFn, nil
}
cfg, err := parseConfig(*configPath)
cw, err = newWatcher(*configPath, gen)
if err != nil {
return err
return nil, fmt.Errorf("failed to init config watcher: %w", err)
}
if cfg.AlertRelabelConfigs != nil {
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
}
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
if err != nil {
return fmt.Errorf("failed to init config watcher: %w", err)
}
getActiveNotifiers = cw.notifiers
return nil
return cw.notifiers, nil
}
// InitSecretFlags must be called after flag.Parse and before any logging
@@ -260,57 +206,23 @@ const (
// GetTargets returns list of static or discovered targets
// via notifier configuration.
//
// Must be called after Init.
func GetTargets() map[TargetType][]Target {
if getActiveNotifiers == nil {
return nil
}
var targets = make(map[TargetType][]Target)
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
if staticNotifiersFn != nil {
for _, ns := range staticNotifiersFn() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
}
}
if cw != nil {
cw.targetsMu.RLock()
for key, ns := range cw.targets {
targets[key] = append(targets[key], ns...)
}
cw.targetsMu.RUnlock()
return targets
}
// static notifiers don't have labels
for _, ns := range getActiveNotifiers() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
}
return targets
}
// Send sends alerts to all active notifiers
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) *vmalertutil.ErrGroup {
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
// apply global relabel config first without modifying original alerts in alerts
for _, a := range alerts {
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
if len(lbls) == 0 {
continue
}
alertsToSend = append(alertsToSend, a)
lblss = append(lblss, lbls)
}
errGr := new(vmalertutil.ErrGroup)
wg := sync.WaitGroup{}
activeNotifiers := getActiveNotifiers()
for i := range activeNotifiers {
nt := activeNotifiers[i]
wg.Go(func() {
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
errGr.Add(fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err))
}
})
}
wg.Wait()
return errGr
}

View File

@@ -1,17 +1,9 @@
package notifier
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
func TestInit(t *testing.T) {
@@ -20,13 +12,14 @@ func TestInit(t *testing.T) {
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
err := Init(nil, "")
fn, err := Init(nil, nil, "")
if err != nil {
t.Fatalf("%s", err)
}
if len(getActiveNotifiers()) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers()))
nfs := fn()
if len(nfs) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
}
targets := GetTargets()
@@ -59,7 +52,7 @@ func TestInitNegative(t *testing.T) {
*configPath = path
*addrs = flagutil.ArrayString{addr}
*blackHole = bh
if err := Init(nil, ""); err == nil {
if _, err := Init(nil, nil, ""); err == nil {
t.Fatalf("expected to get error; got nil instead")
}
}
@@ -76,13 +69,14 @@ func TestBlackHole(t *testing.T) {
*blackHole = true
err := Init(nil, "")
fn, err := Init(nil, nil, "")
if err != nil {
t.Fatalf("%s", err)
}
if len(getActiveNotifiers()) != 1 {
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers()))
nfs := fn()
if len(nfs) != 1 {
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
}
targets := GetTargets()
@@ -97,112 +91,3 @@ func TestBlackHole(t *testing.T) {
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
}
}
func TestGetAlertURLGenerator(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
testAlert := Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
err := InitAlertURLGeneratorFn(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", "group_id", "alert_id")
if exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
err = InitAlertURLGeneratorFn(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
err = InitAlertURLGeneratorFn(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
}
func TestSendAlerts(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
AlertURLGeneratorFn = func(alert Alert) string {
return ""
}
mux := http.NewServeMux()
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
t.Fatalf("should not be called")
})
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
var a []struct {
Labels map[string]string `json:"labels"`
}
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
t.Fatalf("can not unmarshal data into alert %s", err)
}
if len(a) != 2 {
t.Fatalf("expected 2 alert in array got %d", len(a))
}
if len(a[0].Labels) != 4 {
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
}
if a[0].Labels["env"] != "prod" {
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
}
if a[0].Labels["c"] != "baz" {
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
}
if len(a[1].Labels) != 1 {
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
f, err := os.CreateTemp("", "")
if err != nil {
t.Fatal(err)
}
defer fs.MustRemovePath(f.Name())
rawConfig := `
static_configs:
- targets:
- %s
alert_relabel_configs:
- source_labels: [b]
target_label: "c"
alert_relabel_configs:
- source_labels: [a]
target_label: "b"
- target_label: "env"
replacement: "prod"
`
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
writeToFile(f.Name(), config)
oldConfigPath := configPath
defer func() { configPath = oldConfigPath }()
*configPath = f.Name()
err = Init(nil, "")
if err != nil {
t.Fatalf("unexpected error when parse notifier config: %s", err)
}
firingAlerts := []Alert{
{
Name: "alert1",
Labels: map[string]string{"a": "baz"},
},
{
Name: "alert2",
Labels: map[string]string{},
},
}
errG := Send(context.Background(), firingAlerts, nil)
if errG.Err() != nil {
t.Fatalf("unexpected error when sending alerts: %s", err)
}
}

View File

@@ -1,21 +1,15 @@
package notifier
import (
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
import "context"
// Notifier is a common interface for alert manager provider
type Notifier interface {
// Send sends the given list of alerts.
// Returns an error if fails to send the alerts.
// Must unblock if the given ctx is cancelled.
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
// Addr returns address where alerts are sent.
Addr() string
// LastError returns error, that occured during last attempt to send data
LastError() string
// Close is a destructor for the Notifier
Close()
}

View File

@@ -1,10 +1,6 @@
package notifier
import (
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
import "context"
// blackHoleNotifier is a Notifier stub, used when no notifications need
// to be sent.
@@ -14,7 +10,7 @@ type blackHoleNotifier struct {
}
// Send will send no notifications, but increase the metric.
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
bh.metrics.alertsSent.Add(len(alerts))
return nil
}
@@ -29,11 +25,6 @@ func (bh *blackHoleNotifier) Close() {
bh.metrics.close()
}
// LastError return last notifier's error
func (bh *blackHoleNotifier) LastError() string {
return ""
}
// newBlackHoleNotifier creates a new blackHoleNotifier
func newBlackHoleNotifier() *blackHoleNotifier {
address := "blackhole"

View File

@@ -5,7 +5,6 @@ import (
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
metricset "github.com/VictoriaMetrics/metrics"
)
@@ -17,7 +16,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
Start: time.Now().UTC(),
End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil {
}}, nil); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -35,7 +34,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
Start: time.Now().UTC(),
End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil {
}}, nil); err != nil {
t.Fatalf("unexpected error %s", err)
}

View File

@@ -1,19 +0,0 @@
consul_sd_configs:
- server: localhost:8500
scheme: http
services:
- alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "prod"
- server: localhost:8500
services:
- consul
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,13 +0,0 @@
dns_sd_configs:
- names:
- cloudflare.com
type: 'A'
port: 9093
relabel_configs:
- source_labels: [__meta_dns_name]
replacement: '${1}'
target_label: dns_name
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"

View File

@@ -2,19 +2,12 @@ static_configs:
- targets:
- localhost:9093
- localhost:9095
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "static"
consul_sd_configs:
- server: localhost:8500
scheme: http
services:
- alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "consul"
- server: localhost:8500
services:
- consul
@@ -24,10 +17,6 @@ dns_sd_configs:
- cloudflare.com
type: 'A'
port: 9093
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "dns"
relabel_configs:
- source_labels: [__meta_consul_tags]
@@ -36,4 +25,4 @@ relabel_configs:
target_label: __scheme__
- source_labels: [__meta_dns_name]
replacement: '${1}'
target_label: dns_name
target_label: dns_name

View File

@@ -1,14 +1,22 @@
headers:
- 'CustomHeader: foo'
static_configs:
- targets:
- http://192.168.0.101:9093
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"
- localhost:9093
- localhost:9095
- https://localhost:9093/test/api/v2/alerts
basic_auth:
username: foo
password: bar
- targets:
- http://192.168.0.101:9093
alert_relabel_configs:
- target_label: "foo"
replacement: "ccc"
- localhost:9096
- localhost:9097
basic_auth:
username: foo
password: baz
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,19 +0,0 @@
package notifier
// ApiNotifier represents a Notifier configuration for WEB view
type ApiNotifier struct {
// Kind is a Notifier type
Kind TargetType `json:"kind"`
// Targets is a list of Notifier targets
Targets []*ApiTarget `json:"targets"`
}
// ApiTarget represents a specific Notifier target for WEB view
type ApiTarget struct {
// Address is a URL for sending notifications
Address string `json:"address"`
// Labels is a list of labels to add to each sent notification
Labels map[string]string `json:"labels"`
// LastError contains the error faced while sending to notifier.
LastError string `json:"lastError"`
}

View File

@@ -14,9 +14,9 @@ import (
)
var (
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect. "+
"Remote read is used to restore alerts state. "+
"This configuration makes sense only if vmalert was configured with '-remoteWrite.url' before and has been successfully persisted its state. "+
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect."+
"Remote read is used to restore alerts state."+
"This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")

View File

@@ -173,8 +173,9 @@ func (c *Client) run(ctx context.Context) {
cancel()
}
c.wg.Go(func() {
c.wg.Add(1)
go func() {
defer c.wg.Done()
defer ticker.Stop()
for {
select {
@@ -196,7 +197,7 @@ func (c *Client) run(ctx context.Context) {
}
}
}
})
}()
}
var (

View File

@@ -2,7 +2,6 @@ package rule
import (
"context"
"errors"
"fmt"
"hash/fnv"
"math"
@@ -188,54 +187,6 @@ func (ar *AlertingRule) ID() uint64 {
return ar.RuleID
}
// ToAPI returns ApiRule representation of ar
func (ar *AlertingRule) ToAPI() ApiRule {
state := ar.state
lastState := state.getLast()
r := ApiRule{
Type: TypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ar.AlertsToAPI(),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// GetAlerts returns active alerts of rule
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
ar.alertsMu.RLock()
@@ -247,6 +198,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
return alerts
}
// GetAlert returns alert if id exists
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
if ar.alerts == nil {
return nil
}
return ar.alerts[id]
}
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
if !ar.Debug {
return
@@ -312,11 +273,6 @@ type labelSet struct {
// On k conflicts in origin set, the original value is preferred and copied
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
func (ls *labelSet) add(k, v string) {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if v == "" {
return
}
ls.processed[k] = v
ov, ok := ls.origin[k]
if !ok {
@@ -351,6 +307,9 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
Value: m.Values[0],
Expr: ar.Expr,
})
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %w", err)
}
for k, v := range extraLabels {
ls.add(k, v)
}
@@ -361,7 +320,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
if !*disableAlertGroupLabel && ar.GroupName != "" {
ls.add(alertGroupNameLabel, ar.GroupName)
}
return ls, err
return ls, nil
}
// execRange executes alerting rule on the given time range similarly to exec.
@@ -382,7 +341,7 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
}
for _, s := range res.Data {
ls, err := ar.expandLabelTemplates(s, qFn)
ls, err := ar.expandLabelTemplates(s)
if err != nil {
return nil, err
}
@@ -454,7 +413,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
defer func() {
ar.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
if curState.Err != nil {
ar.metrics.errors.Inc()
}
}()
@@ -475,11 +434,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
expandedLabels := make([]*labelSet, len(res.Data))
expandedAnnotations := make([]map[string]string, len(res.Data))
for i, m := range res.Data {
ls, err := ar.expandLabelTemplates(m, qFn)
ls, err := ar.expandLabelTemplates(m)
if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
return nil, curState.Err
}
at := ts
alertID := hash(ls.processed)
@@ -491,9 +449,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
}
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
return nil, curState.Err
}
expandedLabels[i] = ls
expandedAnnotations[i] = as
@@ -599,10 +556,13 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
}
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric) (*labelSet, error) {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported in rule label")
}
ls, err := ar.toLabels(m, qFn)
if err != nil {
return ls, fmt.Errorf("failed to expand label templates: %s", err)
return nil, fmt.Errorf("failed to expand label templates: %s", err)
}
return ls, nil
}
@@ -620,7 +580,7 @@ func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templ
}
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
if err != nil {
return as, fmt.Errorf("failed to expand annotation templates: %s", err)
return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
}
return as, nil
}

View File

@@ -10,7 +10,6 @@ import (
"strings"
"sync"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/metrics"
@@ -827,9 +826,12 @@ func TestGroup_Restore(t *testing.T) {
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
fg.Init()
wg := sync.WaitGroup{}
wg.Go(func() {
fg.Start(context.Background(), nil, fqr)
})
wg.Add(1)
go func() {
nts := func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} }
fg.Start(context.Background(), nts, nil, fqr)
wg.Done()
}()
fg.Close()
wg.Wait()
@@ -1370,10 +1372,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
ar := &AlertingRule{
Labels: map[string]string{
"instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal
"invalid_label": "{{ .Values.mustRuntimeFail }}",
"empty_label": "", // this should be dropped
"instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal
},
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
Name: "AlertingRulesError",
@@ -1381,11 +1381,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
}
expectedOriginLabels := map[string]string{
"instance": "0.0.0.0:8800",
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:268: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
"instance": "0.0.0.0:8800",
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
}
expectedProcessedLabels := map[string]string{
@@ -1395,12 +1394,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
"exported_alertname": "ConfigurationReloadFailure",
"group": "vmalert",
"alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:268: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
}
ls, err := ar.toLabels(metric, nil)
if err == nil || !strings.Contains(err.Error(), "error evaluating template") {
t.Fatalf("unexpected error %q", err.Error())
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
@@ -1431,142 +1429,3 @@ func TestAlertingRuleExec_Partial(t *testing.T) {
t.Fatalf("unexpected error: %s", err)
}
}
func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
fq := &datasource.FakeQuerier{}
fakeGroup := Group{
Name: "TestQueryTemplateInLabels",
}
ar := &AlertingRule{
Name: "test_alert",
Labels: map[string]string{
"suppress_for_mass_alert": `{{ if (printf "ALERTS{alertname='SomeAlert', alertstate='firing', device='%s'} == 1" $labels.device | query) }}true{{ else }}false{{ end }}`,
},
Annotations: map[string]string{
"summary": "Test alert with query template in labels",
},
alerts: make(map[uint64]*notifier.Alert),
}
ar.GroupID = fakeGroup.GetID()
ar.q = fq
ar.state = &ruleState{
entries: make([]StateEntry, 10),
}
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "device", "sda1"))
ts := time.Now()
_, err := ar.exec(context.TODO(), ts, 0)
if err != nil {
t.Fatalf("unexpected error with query template in labels: %s", err)
}
// Verify that the alert was created and the query template was executed
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
alert := ar.GetAlerts()[0]
suppressLabel, exists := alert.Labels["suppress_for_mass_alert"]
if !exists {
t.Fatalf("expected 'suppress_for_mass_alert' label to exist")
}
// The query template should have been executed (even if it returns false due to mock data)
if suppressLabel != "true" && suppressLabel != "false" {
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
}
}
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -2,6 +2,7 @@ package rule
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
@@ -18,15 +19,12 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
var (
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
"Can be overridden by the limit option under group if specified. "+
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
"0 means no limit.")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
@@ -38,8 +36,6 @@ var (
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
)
// Group is an entity for grouping rules
@@ -116,6 +112,7 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
Name: cfg.Name,
File: cfg.File,
Interval: cfg.Interval.Duration(),
Limit: cfg.Limit,
Concurrency: cfg.Concurrency,
checksum: cfg.Checksum,
Params: cfg.Params,
@@ -132,11 +129,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
if g.Interval == 0 {
g.Interval = defaultInterval
}
if cfg.Limit != nil {
g.Limit = *cfg.Limit
} else {
g.Limit = *ruleResultsLimit
}
if g.Concurrency < 1 {
g.Concurrency = 1
}
@@ -297,7 +289,7 @@ func (g *Group) InterruptEval() {
}
}
// Close stops the group and its rules, unregisters group metrics
// Close stops the group and it's rules, unregisters group metrics
func (g *Group) Close() {
if g.doneCh == nil {
return
@@ -306,6 +298,10 @@ func (g *Group) Close() {
g.InterruptEval()
<-g.finishedCh
g.closeGroupMetrics()
}
func (g *Group) closeGroupMetrics() {
metrics.UnregisterSet(g.metrics.set, true)
}
@@ -331,13 +327,13 @@ func (g *Group) Init() {
}
// Start starts group's evaluation
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
defer func() { close(g.finishedCh) }()
evalTS := time.Now()
// sleep random duration to spread group rules evaluation
// over maxStartDelay to reduce the load on datasource.
// over time in order to reduce load on datasource.
if !SkipRandSleepOnGroupStart {
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay)
sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
g.infof("will start in %v", sleepBeforeStart)
sleepTimer := time.NewTimer(sleepBeforeStart)
@@ -369,6 +365,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
}
@@ -475,31 +472,32 @@ func (g *Group) UpdateWith(newGroup *Group) {
g.updateCh <- newGroup
}
// delayBeforeStart returns duration for delaying the evaluation start
// based on given ts and Group settings. The delay can't exceed maxDelay.
// maxDelay is ignored if g.EvalOffset != nil.
//
// Delaying is important to smooth out the load on the datasource when all groups start at the same time.
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
if g.EvalOffset != nil {
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset)
// DeepCopy returns a deep copy of group
func (g *Group) DeepCopy() *Group {
g.mu.RLock()
data, _ := json.Marshal(g)
g.mu.RUnlock()
newG := Group{}
_ = json.Unmarshal(data, &newG)
newG.Rules = g.Rules
newG.id = g.id
return &newG
}
// if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
// otherwise, it returns a random duration between [0..interval] based on group key.
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
if offset != nil {
currentOffsetPoint := ts.Truncate(interval).Add(*offset)
if currentOffsetPoint.Before(ts) {
// wait until the next offset point
return currentOffsetPoint.Add(g.Interval).Sub(ts)
return currentOffsetPoint.Add(interval).Sub(ts)
}
return currentOffsetPoint.Sub(ts)
}
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
interval := g.Interval
if interval > maxDelay {
// artificially limit interval, so groups with big intervals could start sooner.
interval = maxDelay
}
var randSleep time.Duration
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
if randSleep < sleepOffset {
randSleep += interval
@@ -561,13 +559,15 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
if !disableProgressBar {
bar = pb.StartNew(iterations * len(g.Rules))
}
for i := range g.Rules {
rule := g.Rules[i]
for _, r := range g.Rules {
sem <- struct{}{}
wg.Go(func() {
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
wg.Add(1)
go func(r Rule, ri rangeIterator) {
// pass ri as a copy, so it can be modified within the replayRuleRange
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
<-sem
})
wg.Done()
}(r, ri)
}
wg.Wait()
@@ -597,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
for ri.next() {
sem <- struct{}{}
start := ri.s
end := ri.e
wg.Go(func() {
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts)
wg.Add(1)
go func(s, e time.Time) {
n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
if err != nil {
logger.Fatalf("rule %q: %s", r, err)
}
@@ -609,7 +609,8 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
}
res <- n
<-sem
})
wg.Done()
}(ri.s, ri.e)
}
wg.Wait()
close(res)
@@ -623,9 +624,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
}
// ExecOnce evaluates all the rules under group for once with given timestamp.
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error {
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
}
if len(g.Rules) < 1 {
@@ -700,6 +702,7 @@ func (g *Group) getEvalDelay() time.Duration {
// executor contains group's notify and rw configs
type executor struct {
Notifiers func() []notifier.Notifier
notifierHeaders map[string]string
Rw remotewrite.RWClient
@@ -720,13 +723,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
sem := make(chan struct{}, concurrency)
go func() {
wg := sync.WaitGroup{}
for i := range rules {
rule := rules[i]
for _, r := range rules {
sem <- struct{}{}
wg.Go(func() {
res <- e.exec(ctx, rule, ts, resolveDuration, limit)
wg.Add(1)
go func(r Rule) {
res <- e.exec(ctx, r, ts, resolveDuration, limit)
<-sem
})
wg.Done()
}(r)
}
wg.Wait()
close(res)
@@ -780,6 +784,17 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return nil
}
errGr := notifier.Send(ctx, alerts, e.notifierHeaders)
wg := sync.WaitGroup{}
errGr := new(vmalertutil.ErrGroup)
for _, nt := range e.Notifiers() {
wg.Add(1)
go func(nt notifier.Notifier) {
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
}
wg.Done()
}(nt)
}
wg.Wait()
return errGr.Err()
}

View File

@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
updateCh: make(chan *Group),
}
g.Init()
go g.Start(context.Background(), nil, nil)
go g.Start(context.Background(), nil, nil, nil)
rule1 := AlertingRule{
Name: "jobDown",
@@ -346,8 +346,7 @@ func TestGroupStart(t *testing.T) {
}
fs := &datasource.FakeQuerier{}
fn, cleanup := notifier.InitFakeNotifier()
defer cleanup()
fn := &notifier.FakeNotifier{}
const evalInterval = time.Millisecond
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
@@ -396,7 +395,7 @@ func TestGroupStart(t *testing.T) {
fs.Add(m2)
g.Init()
go func() {
g.Start(context.Background(), nil, fs)
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
close(finished)
}()
@@ -473,10 +472,15 @@ func TestFaultyNotifier(t *testing.T) {
r := newTestAlertingRule("instant", 0)
r.q = fq
fn, cleanup := notifier.InitFakeNotifier()
defer cleanup()
e := &executor{}
fn := &notifier.FakeNotifier{}
e := &executor{
Notifiers: func() []notifier.Notifier {
return []notifier.Notifier{
&notifier.FaultyNotifier{},
fn,
}
},
}
delay := 5 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), delay)
defer cancel()
@@ -549,7 +553,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
g := NewGroup(groups[0], fq, evalInterval, nil)
g.Init()
go g.Start(context.Background(), nil, nil)
go g.Start(context.Background(), nil, nil, nil)
time.Sleep(evalInterval * 20)
@@ -567,10 +571,9 @@ func TestCloseWithEvalInterruption(t *testing.T) {
func TestGroupStartDelay(t *testing.T) {
g := &Group{}
g.id = uint64(math.MaxUint64 / 10)
// interval of 5min and key generate a static delay of 30s
g.Interval = time.Minute * 5
maxDelay := time.Minute * 5
key := uint64(math.MaxUint64 / 10)
f := func(atS, expS string) {
t.Helper()
@@ -582,7 +585,7 @@ func TestGroupStartDelay(t *testing.T) {
if err != nil {
t.Fatal(err)
}
delay := g.delayBeforeStart(at, maxDelay)
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
gotStart := at.Add(delay)
if expTS != gotStart {
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
@@ -603,15 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
maxDelay = time.Minute * 1
g.EvalOffset = nil
// test group with maxDelay, and offset disabled
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
}
func TestGetPrometheusReqTimestamp(t *testing.T) {

View File

@@ -2,7 +2,6 @@ package rule
import (
"context"
"errors"
"fmt"
"strings"
"time"
@@ -82,37 +81,6 @@ func (rr *RecordingRule) ID() uint64 {
return rr.RuleID
}
// ToAPI returns ApiRule representation of rr
func (rr *RecordingRule) ToAPI() ApiRule {
state := rr.state
lastState := state.getLast()
r := ApiRule{
Type: TypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// NewRecordingRule creates a new RecordingRule
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
debug := group.Debug
@@ -198,7 +166,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
defer func() {
rr.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
if curState.Err != nil {
rr.metrics.errors.Inc()
}
}()
@@ -237,8 +205,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
Labels: stringToLabels(k),
Samples: []prompb.Sample{
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
},
})
}})
}
rr.lastEvaluation = curEvaluation
return tss, nil
@@ -293,11 +260,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
}
// add extra labels configured by user
for k := range rr.Labels {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if rr.Labels[k] == "" {
continue
}
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
if existingLabel != nil { // there is a conflict between extra and existing label
if existingLabel.Value == rr.Labels[k] {

View File

@@ -21,8 +21,6 @@ type Rule interface {
// ID returns unique ID that may be used for
// identifying this Rule among others.
ID() uint64
// ToAPI returns ApiRule representation of Rule
ToAPI() ApiRule
// exec executes the rule with given context at the given timestamp and limit.
// returns an err if number of resulting time series exceeds the limit.
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
@@ -70,6 +68,39 @@ type StateEntry struct {
Curl string `json:"curl"`
}
// GetLastEntry returns latest stateEntry of rule
func GetLastEntry(r Rule) StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getLast()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getLast()
}
return StateEntry{}
}
// GetRuleStateSize returns size of rule stateEntry
func GetRuleStateSize(r Rule) int {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.size()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.size()
}
return 0
}
// GetAllRuleState returns rule entire stateEntries
func GetAllRuleState(r Rule) []StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getAll()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getAll()
}
return []StateEntry{}
}
func (s *ruleState) size() int {
s.RLock()
defer s.RUnlock()

View File

@@ -34,12 +34,11 @@ body {
padding-top: 4.5rem;
}
.vm-group {
.group-items {
cursor: pointer;
padding: 5px;
margin-top: 5px;
position: relative;
display: none;
}
.btn svg, .dropdown-item svg {
@@ -56,22 +55,14 @@ body {
height: 38px;
}
.vm-item:not(.vm-found) {
display: none;
.group-items:not(:has(.sub-item:not(.d-none))) {
display: none !important;
}
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) {
display: flex;
}
.vm-group:hover {
.group-items:hover {
background-color: #f8f9fa!important;
}
.vm-group:is(.vm-found) .vm-item {
display: table-row;
}
.table {
table-layout: fixed;
}
@@ -120,9 +111,3 @@ textarea.curl-area {
.w-60 {
width: 60%;
}
.annotations {
white-space: pre-wrap;
color: gray;
word-wrap: break-word;
}

View File

@@ -65,34 +65,32 @@ function getParamURL(key) {
return url.searchParams.get(key)
}
function matchText(search, item) {
const text = item.innerText.toLowerCase();
return text.indexOf(search) >= 0;
}
function filterRules(searchPhrase) {
document.querySelectorAll('.vm-group').forEach((group) => {
if (!searchPhrase) {
group.classList.add('vm-found');
return;
}
for (const item of group.querySelectorAll('.vm-group-search')) {
if (matchText(searchPhrase, item)) {
group.classList.add('vm-found');
return;
document.querySelectorAll('.sub-items').forEach((rules) => {
let found = false;
rules.querySelectorAll('.sub-item').forEach((rule) => {
if (searchPhrase) {
const ruleName = rule.innerText.toLowerCase();
const matches = []
const hasValue = ruleName.indexOf(searchPhrase) >= 0;
rule.querySelectorAll('.label').forEach((label) => {
const text = label.innerText.toLowerCase();
if (text.indexOf(searchPhrase) >= 0) {
matches.push(text);
}
});
if (!matches.length && !hasValue) {
rule.classList.add('d-none');
return;
}
}
}
group.classList.remove('vm-found');
for (const item of group.querySelectorAll('.vm-item')) {
if (matchText(searchPhrase, item)) {
item.classList.add('vm-found');
continue;
}
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
item.classList.add('vm-found');
continue;
}
item.classList.remove('vm-found');
rule.classList.remove('d-none');
found = true;
});
if (found && searchPhrase || !searchPhrase) {
rules.classList.remove('d-none');
} else {
rules.classList.add('d-none');
}
});
}

View File

@@ -485,12 +485,6 @@ func templateFuncs() textTpl.FuncMap {
/* Helpers */
// now returns the Unix timestamp in seconds at the time of the template evaluation.
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
"now": func() float64 {
return float64(time.Now().Unix())
},
// Converts a list of objects to a map with keys arg0, arg1 etc.
// This is intended to allow multiple arguments to be passed to templates.
"args": func(args ...any) map[string]any {

View File

@@ -29,9 +29,7 @@ var (
{"api/v1/rules", "list all loaded groups and rules"},
{"api/v1/alerts", "list all active alerts"},
{"api/v1/notifiers", "list all notifiers"},
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamAlertID), "get alert status by group and alert ID"},
{fmt.Sprintf("api/v1/rule?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamRuleID), "get rule status by group and rule ID"},
{fmt.Sprintf("api/v1/group?%s=<int>", rule.ParamGroupID), "get group status by group ID"},
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
}
systemLinks = [][2]string{
{"vmalert/groups", "UI"},
@@ -47,8 +45,8 @@ var (
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
}
ruleTypeMap = map[string]string{
"alert": rule.TypeAlerting,
"record": rule.TypeRecording,
"alert": ruleTypeAlerting,
"record": ruleTypeRecording,
}
)
@@ -114,7 +112,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/rules":
// Grafana makes an extra request to `/rules`
// handler in addition to `/api/v1/rules` calls in alerts UI
var data []*rule.ApiGroup
var data []*apiGroup
rf, err := newRulesFilter(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
@@ -180,14 +178,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
w.Write(data)
return true
case "/vmalert/api/v1/rule", "/api/v1/rule":
apiRule, err := rh.getRule(r)
rule, err := rh.getRule(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
rwu := rule.ApiRuleWithUpdates{
ApiRule: apiRule,
StateUpdates: apiRule.Updates,
rwu := apiRuleWithUpdates{
apiRule: rule,
StateUpdates: rule.Updates,
}
data, err := json.Marshal(rwu)
if err != nil {
@@ -197,20 +195,6 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
w.Header().Set("Content-Type", "application/json")
w.Write(data)
return true
case "/vmalert/api/v1/group", "/api/v1/group":
group, err := rh.getGroup(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
data, err := json.Marshal(group)
if err != nil {
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.Write(data)
return true
case "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true
@@ -225,42 +209,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
}
}
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
}
obj, err := rh.m.groupAPI(groupID)
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
if err != nil {
return nil, errResponse(err, http.StatusNotFound)
}
return obj, nil
}
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
if err != nil {
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
}
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
if err != nil {
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
}
obj, err := rh.m.ruleAPI(groupID, ruleID)
if err != nil {
return rule.ApiRule{}, errResponse(err, http.StatusNotFound)
return apiRule{}, errResponse(err, http.StatusNotFound)
}
return obj, nil
}
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
}
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err)
return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
}
a, err := rh.m.alertAPI(groupID, alertID)
if err != nil {
@@ -272,7 +244,7 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
type listGroupsResponse struct {
Status string `json:"status"`
Data struct {
Groups []*rule.ApiGroup `json:"groups"`
Groups []*apiGroup `json:"groups"`
} `json:"data"`
}
@@ -338,19 +310,19 @@ func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
return true
}
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
func (rh *requestHandler) groups(rf *rulesFilter) []*apiGroup {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
groups := make([]*rule.ApiGroup, 0)
groups := make([]*apiGroup, 0)
for _, group := range rh.m.groups {
if !rf.matchesGroup(group) {
continue
}
g := group.ToAPI()
g := groupToAPI(group)
// the returned list should always be non-nil
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
filteredRules := make([]rule.ApiRule, 0)
filteredRules := make([]apiRule, 0)
for _, rule := range g.Rules {
if rf.ruleType != "" && rf.ruleType != rule.Type {
continue
@@ -378,7 +350,7 @@ func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
groups = append(groups, g)
}
// sort list of groups for deterministic output
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int {
slices.SortFunc(groups, func(a, b *apiGroup) int {
if a.Name != b.Name {
return strings.Compare(a.Name, b.Name)
}
@@ -403,32 +375,32 @@ func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
type listAlertsResponse struct {
Status string `json:"status"`
Data struct {
Alerts []*rule.ApiAlert `json:"alerts"`
Alerts []*apiAlert `json:"alerts"`
} `json:"data"`
}
func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
func (rh *requestHandler) groupAlerts() []groupAlerts {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
var gAlerts []rule.GroupAlerts
for _, group := range rh.m.groups {
var alerts []*rule.ApiAlert
g := group.ToAPI()
var gAlerts []groupAlerts
for _, g := range rh.m.groups {
var alerts []*apiAlert
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
a, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
alerts = append(alerts, r.Alerts...)
alerts = append(alerts, ruleToAPIAlert(a)...)
}
if len(alerts) > 0 {
gAlerts = append(gAlerts, rule.GroupAlerts{
Group: g,
gAlerts = append(gAlerts, groupAlerts{
Group: groupToAPI(g),
Alerts: alerts,
})
}
}
slices.SortFunc(gAlerts, func(a, b rule.GroupAlerts) int {
slices.SortFunc(gAlerts, func(a, b groupAlerts) int {
return strings.Compare(a.Group.Name, b.Group.Name)
})
return gAlerts
@@ -439,22 +411,22 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
defer rh.m.groupsMu.RUnlock()
lr := listAlertsResponse{Status: "success"}
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
lr.Data.Alerts = make([]*apiAlert, 0)
for _, group := range rh.m.groups {
if !rf.matchesGroup(group) {
continue
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
for _, r := range group.Rules {
a, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...)
lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
}
}
// sort list of alerts for deterministic output
slices.SortFunc(lr.Data.Alerts, func(a, b *rule.ApiAlert) int {
slices.SortFunc(lr.Data.Alerts, func(a, b *apiAlert) int {
return strings.Compare(a.ID, b.ID)
})
@@ -471,7 +443,7 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
type listNotifiersResponse struct {
Status string `json:"status"`
Data struct {
Notifiers []*notifier.ApiNotifier `json:"notifiers"`
Notifiers []*apiNotifier `json:"notifiers"`
} `json:"data"`
}
@@ -479,20 +451,19 @@ func (rh *requestHandler) listNotifiers() ([]byte, error) {
targets := notifier.GetTargets()
lr := listNotifiersResponse{Status: "success"}
lr.Data.Notifiers = make([]*notifier.ApiNotifier, 0)
lr.Data.Notifiers = make([]*apiNotifier, 0)
for protoName, protoTargets := range targets {
nr := &notifier.ApiNotifier{
Kind: protoName,
Targets: make([]*notifier.ApiTarget, 0, len(protoTargets)),
notifier := &apiNotifier{
Kind: string(protoName),
Targets: make([]*apiTarget, 0, len(protoTargets)),
}
for _, target := range protoTargets {
nr.Targets = append(nr.Targets, &notifier.ApiTarget{
Address: target.Addr(),
Labels: target.Labels.ToMap(),
LastError: target.LastError(),
notifier.Targets = append(notifier.Targets, &apiTarget{
Address: target.Addr(),
Labels: target.Labels.ToMap(),
})
}
lr.Data.Notifiers = append(lr.Data.Notifiers, nr)
lr.Data.Notifiers = append(lr.Data.Notifiers, notifier)
}
b, err := json.Marshal(lr)

View File

@@ -8,7 +8,6 @@
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
) %}
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
@@ -94,7 +93,7 @@
{%= tpl.Footer(r) %}
{% endfunc %}
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %}
{% func ListGroups(r *http.Request, groups []*apiGroup, filter string) %}
{%code
prefix := vmalertutil.Prefix(r.URL.Path)
filters := map[string]string{
@@ -114,17 +113,14 @@
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
{% if len(groups) > 0 %}
{% for _, g := range groups %}
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
<div id="group-{%s g.ID %}" class="d-flex w-100 border-0 flex-column group-items{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
<span class="d-flex justify-content-between">
<a
class="vm-group-search"
href="#group-{%s g.ID %}"
>{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
<span
class="flex-grow-1 d-flex justify-content-end"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="d-flex gap-2">
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
@@ -137,9 +133,9 @@
class="d-flex flex-column row-gap-2 mb-2"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="fs-6 text-start vm-group-search w-100 fw-lighter">{%s g.File %}</span>
<span class="fs-6 text-start w-100 fw-lighter">{%s g.File %}</span>
{% if len(g.Params) > 0 %}
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
<span>Extra params</span>
@@ -161,7 +157,7 @@
</span>
{% endif %}
</span>
<div class="collapse" id="item-{%s g.ID %}">
<div class="collapse sub-items" id="sub-{%s g.ID %}">
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
@@ -172,7 +168,7 @@
</thead>
<tbody>
{% for _, r := range g.Rules %}
<tr class="vm-item{% if r.LastError != "" %} alert-danger{% endif %}">
<tr class="sub-item{% if r.LastError != "" %} alert-danger{% endif %}">
<td>
<div class="row">
<div class="col-12 mb-2">
@@ -209,12 +205,7 @@
</div>
</td>
<td class="text-center">{%d r.LastSamples %}</td>
<td class="text-center">{% if r.LastEvaluation.IsZero() %}
Never
{% else %}
{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago
{% endif %}
</td>
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
</tr>
{% endfor %}
</tbody>
@@ -231,7 +222,7 @@
{% endfunc %}
{% func ListAlerts(r *http.Request, groupAlerts []rule.GroupAlerts) %}
{% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
{%= Controls(prefix, "", "", nil, nil, true) %}
@@ -240,7 +231,7 @@
{%code
g := ga.Group
var keys []string
alertsByRule := make(map[string][]*rule.ApiAlert)
alertsByRule := make(map[string][]*apiAlert)
for _, alert := range ga.Alerts {
if len(alertsByRule[alert.RuleID]) < 1 {
keys = append(keys, alert.RuleID)
@@ -249,14 +240,14 @@
}
sort.Strings(keys)
%}
<div class="w-100 flex-column vm-group alert-danger">
<div class="d-flex w-100 flex-column group-items alert-danger">
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
<span
class="flex-grow-1 d-flex justify-content-end"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
</span>
@@ -266,10 +257,10 @@
class="fs-6 text-start w-100 fw-lighter"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>{%s g.File %}</span>
</span>
<div class="collapse" id="item-{%s g.ID %}">
<div class="collapse sub-items" id="sub-{%s g.ID %}">
{% for _, ruleID := range keys %}
{%code
defaultAR := alertsByRule[ruleID][0]
@@ -280,7 +271,7 @@
sort.Strings(labelKeys)
%}
<br>
<div class="vm-item">
<div class="sub-item">
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
<br>
@@ -345,20 +336,20 @@
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
count := len(ns)
%}
<div class="w-100 flex-column vm-group">
<div class="d-flex w-100 flex-column group-items">
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
<span
class="flex-grow-1"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s typeK %}"
data-bs-target="#sub-{%s typeK %}"
></span>
</span>
<div id="item-{%s typeK %}" class="collapse show">
<div id="sub-{%s typeK %}" class="collapse show sub-items">
<table class="table table-striped table-hover table-sm">
<thead>
<tr class="vm-item">
<tr class="sub-item">
<th scope="col">Labels</th>
<th scope="col">Address</th>
</tr>
@@ -387,7 +378,7 @@
{%= tpl.Footer(r) %}
{% endfunc %}
{% func Alert(r *http.Request, alert *rule.ApiAlert) %}
{% func Alert(r *http.Request, alert *apiAlert) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -443,7 +434,7 @@
<div class="col">
{% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br>
<p class="annotations">{%s alert.Annotations[k] %}</p>
<p>{%s alert.Annotations[k] %}</p>
{% endfor %}
</div>
</div>
@@ -473,7 +464,7 @@
{% endfunc %}
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %}
{% func RuleDetails(r *http.Request, rule apiRule) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -557,7 +548,7 @@
<div class="col">
{% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br>
<p class="annotations">{%s rule.Annotations[k] %}</p>
<p>{%s rule.Annotations[k] %}</p>
{% endfor %}
</div>
</div>
@@ -602,11 +593,11 @@
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th scope="col" title="The time when the rule was executed">Updated at</th>
<th scope="col" title="The time when event was created">Updated at</th>
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th>
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
</tr>
</thead>
@@ -658,7 +649,7 @@
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
{% endfunc %}
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %}
{% func seriesFetchedWarn(prefix string, r apiRule) %}
{% if isNoMatch(r) %}
<svg
data-bs-toggle="tooltip"
@@ -672,7 +663,7 @@
{% endfunc %}
{%code
func isNoMatch (r rule.ApiRule) bool {
func isNoMatch (r apiRule) bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
}
%}

File diff suppressed because it is too large Load Diff

View File

@@ -23,12 +23,8 @@ func TestHandler(t *testing.T) {
Timestamps: []int64{0},
})
m := &manager{groups: map[uint64]*rule.Group{}}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
var ar *rule.AlertingRule
var rr *rule.RecordingRule
var groupIDs []uint64
for _, dsType := range []string{"prometheus", "", "graphite"} {
g := rule.NewGroup(config.Group{
Name: "group",
@@ -48,10 +44,8 @@ func TestHandler(t *testing.T) {
}, fq, 1*time.Minute, nil)
ar = g.Rules[0].(*rule.AlertingRule)
rr = g.Rules[1].(*rule.RecordingRule)
g.ExecOnce(context.Background(), nil, time.Time{})
id := g.CreateID()
m.groups[id] = g
groupIDs = append(groupIDs, id)
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
m.groups[g.CreateID()] = g
}
rh := &requestHandler{m: m}
@@ -88,22 +82,22 @@ func TestHandler(t *testing.T) {
})
t.Run("/vmalert/rule", func(t *testing.T) {
a := ar.ToAPI()
a := ruleToAPI(ar)
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
r := rr.ToAPI()
r := ruleToAPI(rr)
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
})
t.Run("/vmalert/alert", func(t *testing.T) {
alerts := ar.AlertsToAPI()
alerts := ruleToAPIAlert(ar)
for _, a := range alerts {
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
}
})
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamRuleID)
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamRuleID)
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramRuleID)
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
})
@@ -130,14 +124,14 @@ func TestHandler(t *testing.T) {
}
})
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
expAlert := rule.NewAlertAPI(ar, ar.GetAlerts()[0])
alert := &rule.ApiAlert{}
expAlert := newAlertAPI(ar, ar.GetAlerts()[0])
alert := &apiAlert{}
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
if !reflect.DeepEqual(alert, expAlert) {
t.Fatalf("expected %v is equal to %v", alert, expAlert)
}
alert = &rule.ApiAlert{}
alert = &apiAlert{}
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
if !reflect.DeepEqual(alert, expAlert) {
t.Fatalf("expected %v is equal to %v", alert, expAlert)
@@ -145,16 +139,16 @@ func TestHandler(t *testing.T) {
})
t.Run("/api/v1/alert?badParams", func(t *testing.T) {
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamAlertID)
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramAlertID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamAlertID)
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramAlertID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
// bad request, alertID is missing
params = fmt.Sprintf("?%s=1", rule.ParamGroupID)
params = fmt.Sprintf("?%s=1", paramGroupID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
})
@@ -173,42 +167,27 @@ func TestHandler(t *testing.T) {
}
})
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
expRule := ar.ToAPI()
gotRule := rule.ApiRule{}
expRule := ruleToAPI(ar)
gotRule := apiRule{}
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
if expRule.ID != gotRule.ID {
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
}
gotRule = rule.ApiRule{}
gotRule = apiRule{}
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
if expRule.ID != gotRule.ID {
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
}
gotRuleWithUpdates := rule.ApiRuleWithUpdates{}
gotRuleWithUpdates := apiRuleWithUpdates{}
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
if len(gotRuleWithUpdates.StateUpdates) < 1 {
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
}
})
t.Run("/api/v1/group?groupID", func(t *testing.T) {
id := groupIDs[0]
g := m.groups[id]
expGroup := g.ToAPI()
gotGroup := rule.ApiGroup{}
getResp(t, ts.URL+"/"+expGroup.APILink(), &gotGroup, 200)
if expGroup.ID != gotGroup.ID {
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
}
gotGroup = rule.ApiGroup{}
getResp(t, ts.URL+"/vmalert/"+expGroup.APILink(), &gotGroup, 200)
if expGroup.ID != gotGroup.ID {
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
}
})
t.Run("/api/v1/rules&filters", func(t *testing.T) {
check := func(url string, statusCode, expGroups, expRules int) {

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"fmt"
@@ -8,28 +8,79 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
)
const (
// ParamGroupID is group id key in url parameter
ParamGroupID = "group_id"
paramGroupID = "group_id"
// ParamAlertID is alert id key in url parameter
ParamAlertID = "alert_id"
paramAlertID = "alert_id"
// ParamRuleID is rule id key in url parameter
ParamRuleID = "rule_id"
// TypeRecording is a RecordingRule type
TypeRecording = "recording"
// TypeAlerting is an AlertingRule type
TypeAlerting = "alerting"
paramRuleID = "rule_id"
)
// ApiGroup represents a Group for web view
type ApiGroup struct {
type apiNotifier struct {
Kind string `json:"kind"`
Targets []*apiTarget `json:"targets"`
}
type apiTarget struct {
Address string `json:"address"`
Labels map[string]string `json:"labels"`
}
// apiAlert represents a notifier.AlertingRule state
// for WEB view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type apiAlert struct {
State string `json:"state"`
Name string `json:"name"`
Value string `json:"value"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
// Additional fields
// ID is an unique Alert's ID within a group
ID string `json:"id"`
// RuleID is an unique Rule's ID within a group
RuleID string `json:"rule_id"`
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// Expression contains the PromQL/MetricsQL expression
// for Rule's evaluation
Expression string `json:"expression"`
// SourceLink contains a link to a system which should show
// why Alert was generated
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
func (aa *apiAlert) WebLink() string {
return fmt.Sprintf("alert?%s=%s&%s=%s",
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
}
// APILink returns a link to the alert's JSON representation.
func (aa *apiAlert) APILink() string {
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
}
// apiGroup represents Group for web view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type apiGroup struct {
// Name is the group name as present in the config
Name string `json:"name"`
// Rules contains both recording and alerting rules
Rules []ApiRule `json:"rules"`
Rules []apiRule `json:"rules"`
// Interval is the Group's evaluation interval in float seconds as present in the file.
Interval float64 `json:"interval"`
// LastEvaluation is the timestamp of the last time the Group was executed
@@ -65,20 +116,15 @@ type ApiGroup struct {
NoMatch int
}
// APILink returns a link to the group's JSON representation.
func (ag *ApiGroup) APILink() string {
return fmt.Sprintf("api/v1/group?%s=%s", ParamGroupID, ag.ID)
// groupAlerts represents a group of alerts for WEB view
type groupAlerts struct {
Group *apiGroup
Alerts []*apiAlert
}
// GroupAlerts represents a Group with its Alerts for web view
type GroupAlerts struct {
Group *ApiGroup
Alerts []*ApiAlert
}
// ApiRule represents a Rule for web view
// apiRule represents a Rule for web view
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type ApiRule struct {
type apiRule struct {
// State must be one of these under following scenarios
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
// "firing": at least 1 alert in the rule in firing state.
@@ -100,7 +146,7 @@ type ApiRule struct {
// LastEvaluation is the timestamp of the last time the rule was executed
LastEvaluation time.Time `json:"lastEvaluation"`
// Alerts is the list of all the alerts in this rule that are currently pending or firing
Alerts []*ApiAlert `json:"alerts,omitempty"`
Alerts []*apiAlert `json:"alerts,omitempty"`
// Health is the health of rule evaluation.
// It MUST be one of "ok", "err", "unknown"
Health string `json:"health"`
@@ -131,87 +177,143 @@ type ApiRule struct {
// MaxUpdates is the max number of recorded ruleStateEntry objects
MaxUpdates int `json:"max_updates_entries"`
// Updates contains the ordered list of recorded ruleStateEntry objects
Updates []StateEntry `json:"-"`
Updates []rule.StateEntry `json:"-"`
}
// ApiAlert represents a notifier.AlertingRule state
// for WEB view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type ApiAlert struct {
State string `json:"state"`
Name string `json:"name"`
Value string `json:"value"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
// Additional fields
// ID is an unique Alert's ID within a group
ID string `json:"id"`
// RuleID is an unique Rule's ID within a group
RuleID string `json:"rule_id"`
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// Expression contains the PromQL/MetricsQL expression
// for Rule's evaluation
Expression string `json:"expression"`
// SourceLink contains a link to a system which should show
// why Alert was generated
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
func (aa *ApiAlert) WebLink() string {
return fmt.Sprintf("alert?%s=%s&%s=%s",
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
}
// APILink returns a link to the alert's JSON representation.
func (aa *ApiAlert) APILink() string {
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
}
// ApiRuleWithUpdates represents ApiRule but with extra fields for marshalling
type ApiRuleWithUpdates struct {
ApiRule
// apiRuleWithUpdates represents apiRule but with extra fields for marshalling
type apiRuleWithUpdates struct {
apiRule
// Updates contains the ordered list of recorded ruleStateEntry objects
StateUpdates []StateEntry `json:"updates,omitempty"`
StateUpdates []rule.StateEntry `json:"updates,omitempty"`
}
// APILink returns a link to the rule's JSON representation.
func (ar ApiRule) APILink() string {
func (ar apiRule) APILink() string {
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
}
// WebLink returns a link to the alert which can be used in UI.
func (ar ApiRule) WebLink() string {
func (ar apiRule) WebLink() string {
return fmt.Sprintf("rule?%s=%s&%s=%s",
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
}
// AlertsToAPI returns list of ApiAlert objects from existing alerts
func (ar *AlertingRule) AlertsToAPI() []*ApiAlert {
var alerts []*ApiAlert
func ruleToAPI(r any) apiRule {
if ar, ok := r.(*rule.AlertingRule); ok {
return alertingToAPI(ar)
}
if rr, ok := r.(*rule.RecordingRule); ok {
return recordingToAPI(rr)
}
return apiRule{}
}
const (
ruleTypeRecording = "recording"
ruleTypeAlerting = "alerting"
)
func recordingToAPI(rr *rule.RecordingRule) apiRule {
lastState := rule.GetLastEntry(rr)
r := apiRule{
Type: ruleTypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: rule.GetRuleStateSize(rr),
Updates: rule.GetAllRuleState(rr),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// alertingToAPI returns Rule representation in form of apiRule
func alertingToAPI(ar *rule.AlertingRule) apiRule {
lastState := rule.GetLastEntry(ar)
r := apiRule{
Type: ruleTypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ruleToAPIAlert(ar),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: rule.GetRuleStateSize(ar),
Updates: rule.GetAllRuleState(ar),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// ruleToAPIAlert generates list of apiAlert objects from existing alerts
func ruleToAPIAlert(ar *rule.AlertingRule) []*apiAlert {
var alerts []*apiAlert
for _, a := range ar.GetAlerts() {
if a.State == notifier.StateInactive {
continue
}
alerts = append(alerts, NewAlertAPI(ar, a))
alerts = append(alerts, newAlertAPI(ar, a))
}
return alerts
}
// alertToAPI generates apiAlert object from alert by its id(hash)
func alertToAPI(ar *rule.AlertingRule, id uint64) *apiAlert {
a := ar.GetAlert(id)
if a == nil {
return nil
}
return newAlertAPI(ar, a)
}
// NewAlertAPI creates apiAlert for notifier.Alert
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
aa := &ApiAlert{
func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
aa := &apiAlert{
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", a.ID),
GroupID: fmt.Sprintf("%d", a.GroupID),
@@ -226,8 +328,8 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
Restored: a.Restored,
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
}
if notifier.AlertURLGeneratorFn != nil {
aa.SourceLink = notifier.AlertURLGeneratorFn(*a)
if alertURLGeneratorFn != nil {
aa.SourceLink = alertURLGeneratorFn(*a)
}
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
aa.Stabilizing = true
@@ -235,11 +337,9 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
return aa
}
// ToAPI returns ApiGroup representation of g
func (g *Group) ToAPI() *ApiGroup {
g.mu.RLock()
defer g.mu.RUnlock()
ag := ApiGroup{
func groupToAPI(g *rule.Group) *apiGroup {
g = g.DeepCopy()
ag := apiGroup{
// encode as string to avoid rounding
ID: strconv.FormatUint(g.GetID(), 10),
Name: g.Name,
@@ -259,9 +359,9 @@ func (g *Group) ToAPI() *ApiGroup {
if g.EvalDelay != nil {
ag.EvalDelay = g.EvalDelay.Seconds()
}
ag.Rules = make([]ApiRule, 0)
ag.Rules = make([]apiRule, 0)
for _, r := range g.Rules {
ag.Rules = append(ag.Rules, r.ToAPI())
ag.Rules = append(ag.Rules, ruleToAPI(r))
}
return &ag
}

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"fmt"
@@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
)
func TestRecordingToApi(t *testing.T) {
@@ -16,7 +17,7 @@ func TestRecordingToApi(t *testing.T) {
Values: []float64{1}, Timestamps: []int64{0},
})
entriesLimit := 44
g := NewGroup(config.Group{
g := rule.NewGroup(config.Group{
Name: "group",
File: "rules.yaml",
Concurrency: 1,
@@ -30,24 +31,24 @@ func TestRecordingToApi(t *testing.T) {
},
},
}, fq, 1*time.Minute, nil)
rr := g.Rules[0].(*RecordingRule)
rr := g.Rules[0].(*rule.RecordingRule)
expectedRes := ApiRule{
expectedRes := apiRule{
Name: "record_name",
Query: "up",
Labels: map[string]string{"label": "value"},
Health: "ok",
Type: TypeRecording,
Type: ruleTypeRecording,
DatasourceType: "prometheus",
ID: "1248",
GroupID: fmt.Sprintf("%d", g.CreateID()),
GroupName: "group",
File: "rules.yaml",
MaxUpdates: 44,
Updates: make([]StateEntry, 0),
Updates: make([]rule.StateEntry, 0),
}
res := rr.ToAPI()
res := recordingToAPI(rr)
if !reflect.DeepEqual(res, expectedRes) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)

View File

@@ -27,9 +27,6 @@ vmauth-linux-ppc64le-prod:
vmauth-linux-386-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
vmauth-linux-s390x-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-linux-s390x
vmauth-darwin-amd64-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64

View File

@@ -41,9 +41,6 @@ var (
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
defaultMergeQueryArgs = flagutil.NewArrayString("mergeQueryArgs", "An optional list of client query arg names, which must be merged with args at backend urls. "+
"The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; "+
"see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling")
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
@@ -78,7 +75,6 @@ type UserInfo struct {
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
TLSCertFile string `yaml:"tls_cert_file,omitempty"`
@@ -127,22 +123,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
return mcr
}
func (ui *UserInfo) stopHealthChecks() {
if ui == nil {
return
}
if ui.URLPrefix == nil {
return
}
pbus := ui.URLPrefix.bus.Load()
if *pbus != nil {
for _, bu := range *pbus {
bu.stopHealthCheck()
}
}
}
// Header is `Name: Value` http header, which must be added to the proxied request.
type Header struct {
Name string
@@ -202,11 +182,6 @@ type URLMap struct {
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
// MergeQueryArgs is a list of client query args, which must be merged with the existing backend query args.
//
// The rest of client query args are replaced with the corresponding backend query args for security reasons.
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
}
@@ -253,7 +228,7 @@ func (qa *QueryArg) MarshalYAML() (any, error) {
return qa.sOriginal, nil
}
// URLPrefix represents the `url_prefix` from auth config.
// URLPrefix represents passed `url_prefix`
type URLPrefix struct {
// requests are re-tried on other backend urls for these http response status codes
retryStatusCodes []int
@@ -261,11 +236,6 @@ type URLPrefix struct {
// load balancing policy used
loadBalancingPolicy string
// the list of client query args, which must be merged with backend query args.
//
// By default backend query args replace all the client query args for security reasons.
mergeQueryArgs []string
// how many request path prefix parts to drop before routing the request to backendURL
dropSrcPathPrefixParts int
@@ -303,69 +273,20 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
}
type backendURL struct {
broken atomic.Bool
stopHealthCheckCh chan struct{}
stopHealthCheckOnce sync.Once
brokenDeadline atomic.Uint64
concurrentRequests atomic.Int32
url *url.URL
}
func (bu *backendURL) isBroken() bool {
return bu.broken.Load()
ct := fasttime.UnixTimestamp()
return ct < bu.brokenDeadline.Load()
}
func (bu *backendURL) setBroken() {
if !bu.broken.Load() && bu.broken.CompareAndSwap(false, true) {
bu.startHealthCheck()
}
}
func (bu *backendURL) startHealthCheck() {
go func() {
port := bu.url.Port()
if port == "" {
port = "80"
}
addr := net.JoinHostPort(bu.url.Hostname(), port)
t := time.NewTimer(*failTimeout)
for {
select {
case <-t.C:
// Do not perform tcp probe for https urls as
// - the network unavailability is less probable for https urls
// - it will pollute logs on server side with SSL handshake errors.
if bu.url.Scheme == "https" {
bu.broken.Store(false)
return
}
// Verify network connectivity via TCP dial before marking backend healthy.
// Previously, backends were auto-restored after failTimeout without validation,
// causing requests to repeatedly hang on unreachable backends.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9890
c, err := net.DialTimeout(`tcp`, addr, time.Second)
if err != nil {
t.Reset(*failTimeout)
continue
}
_ = c.Close()
bu.broken.Store(false)
return
case <-bu.stopHealthCheckCh:
t.Stop()
return
}
}
}()
}
func (bu *backendURL) stopHealthCheck() {
bu.stopHealthCheckOnce.Do(func() {
close(bu.stopHealthCheckCh)
})
deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
bu.brokenDeadline.Store(deadline)
}
func (bu *backendURL) get() {
@@ -479,8 +400,7 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
buCopy := *bu
buCopy.Host = addr
busNew = append(busNew, &backendURL{
url: &buCopy,
stopHealthCheckCh: make(chan struct{}),
url: &buCopy,
})
}
}
@@ -492,12 +412,6 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
// Store new backend urls
up.bus.Store(&busNew)
if *pbus != nil {
for _, bu := range *pbus {
bu.stopHealthCheck()
}
}
}
func areEqualBackendURLs(a, b []*backendURL) bool {
@@ -554,34 +468,27 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
if bu.isBroken() {
continue
}
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
atomicCounter.CompareAndSwap(n+1, idx+1)
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
if bu.concurrentRequests.Load() == 0 {
// Fast path - return the backend with zero concurrently executed requests.
// Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
bu.concurrentRequests.Add(1)
return bu
}
}
// Slow path - return the backend with the minimum number of concurrently executed requests.
buMinIdx := n % uint32(len(bus))
minRequests := bus[buMinIdx].concurrentRequests.Load()
for i := uint32(0); i < uint32(len(bus)); i++ {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
buMin := bus[n%uint32(len(bus))]
minRequests := buMin.concurrentRequests.Load()
for _, bu := range bus {
if bu.isBroken() {
continue
}
reqs := bu.concurrentRequests.Load()
if reqs < minRequests || bus[buMinIdx].isBroken() {
buMinIdx = idx
minRequests = reqs
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
buMin = bu
minRequests = n
}
}
buMin := bus[buMinIdx]
buMin.get()
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
return buMin
}
@@ -804,11 +711,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
acPrev := authConfig.Load()
if acPrev != nil {
acPrev.UnauthorizedUser.stopHealthChecks()
for i := range acPrev.Users {
acPrev.Users[i].stopHealthChecks()
}
metrics.UnregisterSet(acPrev.ms, true)
}
metrics.RegisterSet(ac.ms)
@@ -954,7 +856,6 @@ func (ui *UserInfo) getMetricLabels() (string, error) {
func (ui *UserInfo) initURLs() error {
retryStatusCodes := defaultRetryStatusCodes.Values()
loadBalancingPolicy := *defaultLoadBalancingPolicy
mergeQueryArgs := *defaultMergeQueryArgs
dropSrcPathPrefixParts := 0
discoverBackendIPs := *discoverBackendIPsGlobal
if ui.RetryStatusCodes != nil {
@@ -963,9 +864,6 @@ func (ui *UserInfo) initURLs() error {
if ui.LoadBalancingPolicy != "" {
loadBalancingPolicy = ui.LoadBalancingPolicy
}
if len(ui.MergeQueryArgs) != 0 {
mergeQueryArgs = ui.MergeQueryArgs
}
if ui.DropSrcPathPrefixParts != nil {
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
}
@@ -973,18 +871,16 @@ func (ui *UserInfo) initURLs() error {
discoverBackendIPs = *ui.DiscoverBackendIPs
}
up := ui.URLPrefix
if up != nil {
if err := up.sanitizeAndInitialize(); err != nil {
if ui.URLPrefix != nil {
if err := ui.URLPrefix.sanitizeAndInitialize(); err != nil {
return err
}
up.retryStatusCodes = retryStatusCodes
up.dropSrcPathPrefixParts = dropSrcPathPrefixParts
up.discoverBackendIPs = discoverBackendIPs
if err := up.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
ui.URLPrefix.retryStatusCodes = retryStatusCodes
ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
ui.URLPrefix.discoverBackendIPs = discoverBackendIPs
if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
return err
}
up.mergeQueryArgs = mergeQueryArgs
}
if ui.DefaultURL != nil {
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
@@ -1003,7 +899,6 @@ func (ui *UserInfo) initURLs() error {
}
rscs := retryStatusCodes
lbp := loadBalancingPolicy
mqa := mergeQueryArgs
dsp := dropSrcPathPrefixParts
dbd := discoverBackendIPs
if e.RetryStatusCodes != nil {
@@ -1012,9 +907,6 @@ func (ui *UserInfo) initURLs() error {
if e.LoadBalancingPolicy != "" {
lbp = e.LoadBalancingPolicy
}
if len(e.MergeQueryArgs) != 0 {
mqa = e.MergeQueryArgs
}
if e.DropSrcPathPrefixParts != nil {
dsp = *e.DropSrcPathPrefixParts
}
@@ -1025,7 +917,6 @@ func (ui *UserInfo) initURLs() error {
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
return err
}
e.URLPrefix.mergeQueryArgs = mqa
e.URLPrefix.dropSrcPathPrefixParts = dsp
e.URLPrefix.discoverBackendIPs = dbd
}
@@ -1140,8 +1031,7 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
bus := make([]*backendURL, len(up.busOriginal))
for i, bu := range up.busOriginal {
bus[i] = &backendURL{
url: bu,
stopHealthCheckCh: make(chan struct{}),
url: bu,
}
}
up.bus.Store(&bus)

View File

@@ -280,7 +280,7 @@ users:
}
func TestParseAuthConfigSuccess(t *testing.T) {
f := func(s string, expectedAuthConfig map[string]*UserInfo, expectedUnauthorizedUserConfig *UserInfo) {
f := func(s string, expectedAuthConfig map[string]*UserInfo) {
t.Helper()
ac, err := parseAuthConfig([]byte(s))
if err != nil {
@@ -294,19 +294,15 @@ func TestParseAuthConfigSuccess(t *testing.T) {
if err := areEqualConfigs(m, expectedAuthConfig); err != nil {
t.Fatal(err)
}
if err := areEqualConfigs(ac.UnauthorizedUser, expectedUnauthorizedUserConfig); err != nil {
t.Fatal(err)
}
}
insecureSkipVerifyTrue := true
// Empty config
f(``, map[string]*UserInfo{}, nil)
f(``, map[string]*UserInfo{})
// Empty users
f(`users: []`, map[string]*UserInfo{}, nil)
f(`users: []`, map[string]*UserInfo{})
// Single user
f(`
@@ -324,7 +320,7 @@ users:
MaxConcurrentRequests: 5,
TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
},
}, nil)
})
// Single user with auth_token
f(`
@@ -348,7 +344,7 @@ users:
TLSCertFile: "foo/baz",
TLSKeyFile: "foo/foo",
},
}, nil)
})
// Multiple url_prefix entries
insecureSkipVerifyFalse := false
@@ -363,7 +359,6 @@ users:
tls_insecure_skip_verify: false
retry_status_codes: [500, 501]
load_balancing_policy: first_available
merge_query_args: [foo, bar]
drop_src_path_prefix_parts: 1
discover_backend_ips: true
`, map[string]*UserInfo{
@@ -377,11 +372,10 @@ users:
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
RetryStatusCodes: []int{500, 501},
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
DropSrcPathPrefixParts: intp(1),
DiscoverBackendIPs: &discoverBackendIPsTrue,
},
}, nil)
})
// Multiple users
f(`
@@ -399,7 +393,7 @@ users:
Username: "bar",
URLPrefix: mustParseURL("https://bar/x/"),
},
}, nil)
})
// non-empty URLMap
sharedUserInfo := &UserInfo{
@@ -449,7 +443,7 @@ users:
`, map[string]*UserInfo{
getHTTPAuthBearerToken("foo"): sharedUserInfo,
getHTTPAuthBasicToken("foo", ""): sharedUserInfo,
}, nil)
})
// Multiple users with the same name - this should work, since these users have different passwords
f(`
@@ -471,7 +465,7 @@ users:
Password: "bar",
URLPrefix: mustParseURL("https://bar/x"),
},
}, nil)
})
// with default url
keepOriginalHost := true
@@ -487,8 +481,6 @@ users:
- "foo: bar"
- "xxx: y"
keep_original_host: true
load_balancing_policy: first_available
merge_query_args: [foo, bar]
default_url:
- http://default1/select/0/prometheus
- http://default2/select/0/prometheus
@@ -513,8 +505,6 @@ users:
},
KeepOriginalHost: &keepOriginalHost,
},
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
},
},
DefaultURL: mustParseURLs([]string{
@@ -542,8 +532,6 @@ users:
},
KeepOriginalHost: &keepOriginalHost,
},
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
},
},
DefaultURL: mustParseURLs([]string{
@@ -551,7 +539,7 @@ users:
"http://default2/select/0/prometheus",
}),
},
}, nil)
})
// With metric_labels
f(`
@@ -603,23 +591,6 @@ users:
},
},
},
}, nil)
// unauthorized_user
f(`
unauthorized_user:
merge_query_args: [extra_filters]
url_map:
- src_paths: ["/select/.+"]
url_prefix: 'http://victoria-logs:9428/?extra_filters={env="prod"}'
`, nil, &UserInfo{
MergeQueryArgs: []string{"extra_filters"},
URLMaps: []URLMap{
{
SrcPaths: getRegexs([]string{"/select/.+"}),
URLPrefix: mustParseURL(`http://victoria-logs:9428/?extra_filters={env="prod"}`),
},
},
})
}
@@ -752,12 +723,10 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
})
up.loadBalancingPolicy = "least_loaded"
pbus := up.bus.Load()
bus := *pbus
fn := func(ns ...int) {
t.Helper()
pbus := up.bus.Load()
bus := *pbus
for i, b := range bus {
got := int(b.concurrentRequests.Load())
exp := ns[i]
@@ -769,52 +738,45 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
up.getBackendURL()
fn(1, 0, 0)
up.getBackendURL()
fn(1, 1, 0)
up.getBackendURL()
fn(1, 1, 1)
bus[1].put()
bus[2].put()
fn(1, 0, 0)
up.getBackendURL()
up.getBackendURL()
fn(2, 2, 1)
bus := up.bus.Load()
pbus := *bus
pbus[0].concurrentRequests.Add(2)
pbus[2].concurrentRequests.Add(5)
fn(4, 2, 6)
up.getBackendURL()
fn(1, 1, 0)
fn(4, 3, 6)
bus[1].put()
up.getBackendURL()
fn(1, 0, 1)
fn(4, 4, 6)
up.getBackendURL()
fn(4, 5, 6)
up.getBackendURL()
fn(5, 5, 6)
up.getBackendURL()
fn(6, 5, 6)
up.getBackendURL()
fn(6, 6, 6)
up.getBackendURL()
fn(6, 6, 7)
up.getBackendURL()
up.getBackendURL()
fn(1, 1, 2)
bus[0].concurrentRequests.Add(2)
bus[2].concurrentRequests.Add(2)
fn(3, 1, 4)
up.getBackendURL()
fn(3, 2, 4)
up.getBackendURL()
fn(3, 3, 4)
up.getBackendURL()
fn(4, 3, 4)
up.getBackendURL()
fn(4, 4, 4)
bus[0].put()
bus[2].put()
up.getBackendURL()
fn(3, 4, 4)
up.getBackendURL()
fn(4, 4, 4)
fn(7, 7, 7)
}
func TestBrokenBackend(t *testing.T) {
@@ -922,7 +884,7 @@ func removeMetrics(m map[string]*UserInfo) {
}
}
func areEqualConfigs(a, b any) error {
func areEqualConfigs(a, b map[string]*UserInfo) error {
aData, err := yaml.Marshal(a)
if err != nil {
return fmt.Errorf("cannot marshal a: %w", err)
@@ -950,8 +912,7 @@ func mustParseURLs(us []string) *URLPrefix {
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
}
bus[i] = &backendURL{
url: pu,
stopHealthCheckCh: make(chan struct{}),
url: pu,
}
urls[i] = pu
}

View File

@@ -269,7 +269,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
query.Set("request_path", u.String())
targetURL.RawQuery = query.Encode()
} else { // Update path for regular routes.
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts)
}
wasLocalRetry := false
@@ -310,21 +310,14 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
rtb, rtbOK := req.Body.(*readTrackingBody)
res, err := ui.rt.RoundTrip(req)
if ctxErr := r.Context().Err(); ctxErr != nil {
// Override the error returned by the RoundTrip with the context error if it isn't non-nil
// This makes sure the proper logging for canceled and timed out requests - log the real cause of the error
// instead of the random error, which could be returned from RoundTrip because of canceled or timed out request.
err = ctxErr
}
if err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
// Do not retry canceled or timed out requests
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
if errors.Is(err, context.DeadlineExceeded) {
// Timed out request must be counted as errors, since this usually means that the backend is slow.
logger.Warnf("remoteAddr: %s; requestURI: %s; timeout while proxying the response from %s: %s", remoteAddr, requestURI, targetURL, err)
ui.backendErrors.Inc()
}
return false, false
@@ -379,54 +372,20 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
updateHeadersByConfig(w.Header(), hc.ResponseHeaders)
w.WriteHeader(res.StatusCode)
err = copyStreamToClient(w, res.Body)
copyBuf := copyBufPool.Get()
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
copyBufPool.Put(copyBuf)
_ = res.Body.Close()
if err != nil && !netutil.IsTrivialNetworkError(err) && !errors.Is(err, context.Canceled) {
if err != nil && !netutil.IsTrivialNetworkError(err) {
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
return true, false
}
return true, false
}
func copyStreamToClient(client io.Writer, backend io.Reader) error {
copyBuf := copyBufPool.Get()
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
defer copyBufPool.Put(copyBuf)
buf := copyBuf.B
flusher, ok := client.(http.Flusher)
if !ok {
logger.Panicf("BUG: client must implement net/http.Flusher interface; got %T", client)
}
for {
n, backendErr := backend.Read(buf)
if n > 0 {
data := buf[:n]
n, clientErr := client.Write(data)
if clientErr != nil {
return fmt.Errorf("cannot write data to client: %w", clientErr)
}
if n != len(data) {
logger.Panicf("BUG: unexpected number of bytes written returned by client.Write; got %d; want %d", n, len(data))
}
// Flush the read data from the backend to the client as fast as possible
// in order to reduce delays for data propagation.
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/667
flusher.Flush()
}
if backendErr != nil {
if backendErr == io.EOF {
return nil
}
return fmt.Errorf("cannot read data from backend: %w", backendErr)
}
}
}
var copyBufPool bytesutil.ByteBufferPool
func copyHeader(dst, src http.Header) {

View File

@@ -514,11 +514,6 @@ func (w *fakeResponseWriter) getResponse() string {
return w.bb.String()
}
// Flush implements net/http.Flusher
func (w *fakeResponseWriter) Flush() {
// Nothing to do.
}
func (w *fakeResponseWriter) Header() http.Header {
if w.h == nil {
w.h = http.Header{}

View File

@@ -8,42 +8,29 @@ import (
"strings"
)
func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int, mergeQueryArgs []string) *url.URL {
func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int) *url.URL {
targetURL := *uiURL
srcPath := dropPrefixParts(requestURI.Path, dropSrcPathPrefixParts)
if strings.HasPrefix(srcPath, "/") {
targetURL.Path = strings.TrimSuffix(targetURL.Path, "/")
}
targetURL.Path += srcPath
requestParams := requestURI.Query()
// fast path
if len(requestParams) == 0 {
return &targetURL
}
// Merge client query args with backend query args
targetParams := targetURL.Query()
uiParams := url.Values{}
// Copy all the target query args
for k, v := range targetParams {
for i := range v {
uiParams.Add(k, v[i])
}
}
// Copy the client query args if they do not clash with target args.
// merge query parameters from requests.
uiParams := targetURL.Query()
for k, v := range requestParams {
if targetParams.Has(k) && !slices.Contains(mergeQueryArgs, k) {
// Skip clashed client query params for security reasons
// skip clashed query params from original request
if exist := uiParams.Get(k); len(exist) > 0 {
continue
}
for i := range v {
uiParams.Add(k, v[i])
}
}
targetURL.RawQuery = uiParams.Encode()
return &targetURL
}

View File

@@ -101,7 +101,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
return
}
bu := up.getBackendURL()
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts)
bu.put()
gotTarget := target.String()
@@ -352,7 +352,7 @@ func TestUserInfoGetBackendURL_SRV(t *testing.T) {
return
}
bu := up.getBackendURL()
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts)
bu.put()
gotTarget := target.String()
@@ -528,43 +528,3 @@ func (r *fakeResolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAdd
func (r *fakeResolver) LookupMX(_ context.Context, _ string) ([]*net.MX, error) {
return nil, nil
}
func TestMergeURLs(t *testing.T) {
f := func(clientURL, backendURL string, dropSrcPathPrefixParts int, mergeQueryArgs []string, resultURLExpected string) {
t.Helper()
cu, err := url.Parse(clientURL)
if err != nil {
t.Fatalf("cannot parse client url %q: %s", clientURL, err)
}
cu = normalizeURL(cu)
bu, err := url.Parse(backendURL)
if err != nil {
t.Fatalf("cannot parse backend url %q: %s", backendURL, err)
}
ru := mergeURLs(bu, cu, dropSrcPathPrefixParts, mergeQueryArgs)
resultURL := ru.String()
if resultURL != resultURLExpected {
t.Fatalf("unexpected resultURL\ngot\n%s\nwant\n%s", resultURL, resultURLExpected)
}
}
f("http://foo:1234", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar?baz=abc&de")
f("http://foo:1234", "https://backend/foo/bar/?baz=abc&de", 0, nil, "https://backend/foo/bar/?baz=abc&de")
f("https://foo:1234/", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar?baz=abc&de")
f("https://foo:1234/", "http://backend:8888/foo/bar/?baz=abc&de", 0, nil, "http://backend:8888/foo/bar/?baz=abc&de")
// merge paths
f("http://foo:1234/x/y?z=xxx", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar/x/y?baz=abc&de=&z=xxx")
// "hacky" url
f("http://foo:1234/../../x/../y?z=xxx", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar/y?baz=abc&de=&z=xxx")
// make sure that the client args are overridden by server args by default
f("http://foo:1234/x/y?password=hack&qqq=www", "https://backend/foo/bar?password=abc", 0, nil, "https://backend/foo/bar/x/y?password=abc&qqq=www")
// allow overriding the selected query args
f("http://foo:1234/x/y?baz=xxx&qqq=www", "https://backend/foo/bar?baz=abc", 0, []string{"baz"}, "https://backend/foo/bar/x/y?baz=abc&baz=xxx&qqq=www")
}

View File

@@ -31,9 +31,6 @@ vmbackup-linux-ppc64le-prod:
vmbackup-linux-386-prod:
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-386
vmbackup-linux-s390x-prod:
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-s390x
vmbackup-darwin-amd64-prod:
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-darwin-amd64

View File

@@ -115,7 +115,7 @@ func main() {
if err != nil {
logger.Fatalf("cannot create backup: %s", err)
}
pushmetrics.StopAndPush()
pushmetrics.Stop()
startTime := time.Now()
logger.Infof("gracefully shutting down http server for metrics at %q", listenAddrs)
@@ -212,7 +212,7 @@ func newSrcFS() (*fslocal.FS, error) {
}
func newDstFS(ctx context.Context) (common.RemoteFS, error) {
fs, err := actions.NewRemoteFS(ctx, *dst, nil)
fs, err := actions.NewRemoteFS(ctx, *dst)
if err != nil {
return nil, fmt.Errorf("cannot parse `-dst`=%q: %w", *dst, err)
}
@@ -255,7 +255,7 @@ func newOriginFS(ctx context.Context) (common.OriginFS, error) {
if len(*origin) == 0 {
return &fsnil.FS{}, nil
}
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
fs, err := actions.NewRemoteFS(ctx, *origin)
if err != nil {
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
}
@@ -266,7 +266,7 @@ func newRemoteOriginFS(ctx context.Context) (common.RemoteFS, error) {
if len(*origin) == 0 {
return nil, fmt.Errorf("-origin cannot be empty when -snapshotName and -snapshot.createURL aren't set")
}
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
fs, err := actions.NewRemoteFS(ctx, *origin)
if err != nil {
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
}

View File

@@ -27,9 +27,6 @@ vmctl-linux-ppc64le-prod:
vmctl-linux-386-prod:
APP_NAME=vmctl $(MAKE) app-via-docker-linux-386
vmctl-linux-s390x-prod:
APP_NAME=vmctl $(MAKE) app-via-docker-linux-s390x
vmctl-darwin-amd64-prod:
APP_NAME=vmctl $(MAKE) app-via-docker-darwin-amd64

View File

@@ -689,15 +689,15 @@ var (
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
Layout: time.RFC3339,
},
&cli.StringSliceFlag{
Name: remoteReadFilterLabel,
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
DefaultText: "__name__",
&cli.StringFlag{
Name: remoteReadFilterLabel,
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
Value: "__name__",
},
&cli.StringSliceFlag{
Name: remoteReadFilterLabelValue,
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
DefaultText: ".*",
&cli.StringFlag{
Name: remoteReadFilterLabelValue,
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
Value: ".*",
},
&cli.BoolFlag{
Name: remoteRead,

View File

@@ -1,7 +1,6 @@
package main
import (
"context"
"fmt"
"io"
"log"
@@ -38,7 +37,7 @@ func newInfluxProcessor(ic *influx.Client, im *vm.Importer, cc int, separator st
}
}
func (ip *influxProcessor) run(ctx context.Context) error {
func (ip *influxProcessor) run() error {
series, err := ip.ic.Explore()
if err != nil {
return fmt.Errorf("explore query failed: %s", err)
@@ -48,7 +47,7 @@ func (ip *influxProcessor) run(ctx context.Context) error {
}
question := fmt.Sprintf("Found %d timeseries to import. Continue?", len(series))
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}

View File

@@ -103,7 +103,7 @@ func main() {
}
otsdbProcessor := newOtsdbProcessor(otsdbClient, importer, c.Int(otsdbConcurrency), c.Bool(globalVerbose))
return otsdbProcessor.run(ctx)
return otsdbProcessor.run()
},
},
{
@@ -164,7 +164,7 @@ func main() {
c.Bool(influxSkipDatabaseLabel),
c.Bool(influxPrometheusMode),
c.Bool(globalVerbose))
return processor.run(ctx)
return processor.run()
},
},
{
@@ -192,14 +192,6 @@ func main() {
return fmt.Errorf("failed to create transport for -%s=%q: %s", remoteReadSrcAddr, addr, err)
}
// Backwards compatible default values if none provided by user
rrLabelNames := c.StringSlice(remoteReadFilterLabel)
rrLabelValues := c.StringSlice(remoteReadFilterLabelValue)
if len(rrLabelNames) == 0 && len(rrLabelValues) == 0 {
rrLabelNames = []string{"__name__"}
rrLabelValues = []string{".*"}
}
rr, err := remoteread.NewClient(remoteread.Config{
Addr: addr,
Transport: tr,
@@ -208,8 +200,8 @@ func main() {
Timeout: c.Duration(remoteReadHTTPTimeout),
UseStream: c.Bool(remoteReadUseStream),
Headers: c.String(remoteReadHeaders),
LabelNames: rrLabelNames,
LabelValues: rrLabelValues,
LabelName: c.String(remoteReadFilterLabel),
LabelValue: c.String(remoteReadFilterLabelValue),
DisablePathAppend: c.Bool(remoteReadDisablePathAppend),
})
if err != nil {
@@ -279,7 +271,7 @@ func main() {
cc: c.Int(promConcurrency),
isVerbose: c.Bool(globalVerbose),
}
return pp.run(ctx)
return pp.run()
},
},
{

View File

@@ -1,7 +1,6 @@
package main
import (
"context"
"fmt"
"log"
"sync"
@@ -38,7 +37,7 @@ func newOtsdbProcessor(oc *opentsdb.Client, im *vm.Importer, otsdbcc int, verbos
}
}
func (op *otsdbProcessor) run(ctx context.Context) error {
func (op *otsdbProcessor) run() error {
log.Println("Loading all metrics from OpenTSDB for filters: ", op.oc.Filters)
var metrics []string
for _, filter := range op.oc.Filters {
@@ -54,7 +53,7 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
}
question := fmt.Sprintf("Found %d metrics to import. Continue?", len(metrics))
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
op.im.ResetStats()

View File

@@ -1,7 +1,6 @@
package main
import (
"context"
"fmt"
"log"
"sync"
@@ -31,7 +30,7 @@ type prometheusProcessor struct {
isVerbose bool
}
func (pp *prometheusProcessor) run(ctx context.Context) error {
func (pp *prometheusProcessor) run() error {
blocks, err := pp.cl.Explore()
if err != nil {
return fmt.Errorf("explore failed: %s", err)
@@ -40,7 +39,7 @@ func (pp *prometheusProcessor) run(ctx context.Context) error {
return fmt.Errorf("found no blocks to import")
}
question := fmt.Sprintf("Found %d blocks to import. Continue?", len(blocks))
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}

View File

@@ -47,7 +47,7 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
question := fmt.Sprintf("Selected time range %q - %q will be split into %d ranges according to %q step. Continue?",
rrp.filter.timeStart.String(), rrp.filter.timeEnd.String(), len(ranges), rrp.filter.chunk)
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}

View File

@@ -11,15 +11,14 @@ import (
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/gogo/protobuf/proto"
"github.com/golang/snappy"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/storage/remote"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
)
const (
@@ -64,9 +63,9 @@ type Config struct {
UseStream bool
// Headers optional HTTP headers to send with each request to the corresponding remote storage
Headers string
// LabelNames, LabelValues stands for label=~value pair used for read requests.
// LabelName, LabelValue stands for label=~value pair used for read requests.
// Is optional.
LabelNames, LabelValues []string
LabelName, LabelValue string
}
// Filter defines a list of filters applied to requested data
@@ -95,22 +94,12 @@ func NewClient(cfg Config) (*Client, error) {
return nil, err
}
var matchers []*prompb.LabelMatcher
if len(cfg.LabelNames) > 0 || len(cfg.LabelValues) > 0 {
if len(cfg.LabelNames) != len(cfg.LabelValues) {
return nil, fmt.Errorf("the number of label names and label values must be the same")
}
for i := range cfg.LabelNames {
if cfg.LabelNames[i] == "" {
return nil, fmt.Errorf("label name cannot be empty")
}
matcher := &prompb.LabelMatcher{
Type: prompb.LabelMatcher_RE,
Name: cfg.LabelNames[i],
Value: cfg.LabelValues[i],
}
matchers = append(matchers, matcher)
var m *prompb.LabelMatcher
if cfg.LabelName != "" && cfg.LabelValue != "" {
m = &prompb.LabelMatcher{
Type: prompb.LabelMatcher_RE,
Name: cfg.LabelName,
Value: cfg.LabelValue,
}
}
@@ -127,7 +116,7 @@ func NewClient(cfg Config) (*Client, error) {
password: cfg.Password,
useStream: cfg.UseStream,
headers: headers,
matchers: matchers,
matchers: []*prompb.LabelMatcher{m},
}
return c, nil

View File

@@ -2,7 +2,6 @@ package main
import (
"bufio"
"context"
"fmt"
"os"
"strings"
@@ -16,7 +15,7 @@ const barTpl = `{{ blue "%s:" }} {{ counters . }} {{ bar . "[" "█" (cycle . "
// isSilent should be inited in main
var isSilent bool
func prompt(ctx context.Context, question string) bool {
func prompt(question string) bool {
if isSilent {
return true
}
@@ -26,32 +25,15 @@ func prompt(ctx context.Context, question string) bool {
}
reader := bufio.NewReader(os.Stdin)
fmt.Print(question, " [Y/n] ")
answerCh := make(chan string, 1)
errCh := make(chan error, 1)
go func() {
answer, err := reader.ReadString('\n')
if err != nil {
errCh <- err
return
}
answerCh <- answer
}()
select {
case <-ctx.Done():
fmt.Println("\nCanceled.")
return false
case err := <-errCh:
answer, err := reader.ReadString('\n')
if err != nil {
panic(err)
case answer := <-answerCh:
answer = strings.TrimSpace(strings.ToLower(answer))
if answer == "" || answer == "yes" || answer == "y" {
return true
}
return false
}
answer = strings.TrimSpace(strings.ToLower(answer))
if answer == "" || answer == "yes" || answer == "y" {
return true
}
return false
}
func wrapErr(vmErr *vm.ImportError, verbose bool) error {

View File

@@ -63,7 +63,10 @@ func (ts *TimeSeries) write(w io.Writer) (int, error) {
// Split long lines with more than 10K samples into multiple JSON lines.
// This should limit memory usage at VictoriaMetrics during data ingestion,
// since it allocates memory for the whole JSON line and processes it in one go.
batchSize := min(10000, len(timestamps))
batchSize := 10000
if batchSize > len(timestamps) {
batchSize = len(timestamps)
}
timestampsBatch := timestamps[:batchSize]
valuesBatch := values[:batchSize]
timestamps = timestamps[batchSize:]

View File

@@ -79,7 +79,7 @@ func (p *vmNativeProcessor) run(ctx context.Context) error {
return fmt.Errorf("failed to get tenants: %w", err)
}
question := fmt.Sprintf("The following tenants were discovered: %s.\n Continue?", tenants)
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
}
@@ -121,7 +121,7 @@ func (p *vmNativeProcessor) runSingle(ctx context.Context, f native.Filter, srcU
pr := bar.NewProxyReader(reader)
if pr != nil {
reader = pr
fmt.Fprintf(log.Writer(), "Continue import process with filter %s:\n", f.String())
fmt.Printf("Continue import process with filter %s:\n", f.String())
}
}
@@ -191,7 +191,7 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
initParams = []any{srcURL, dstURL, p.filter.String(), tenantID}
}
fmt.Fprintln(log.Writer(), "") // extra line for better output formatting
fmt.Println("") // extra line for better output formatting
log.Printf(initMessage, initParams...)
if len(ranges) > 1 {
log.Printf("Selected time range will be split into %d ranges according to %q step", len(ranges), p.filter.Chunk)
@@ -233,7 +233,7 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
// do not prompt for intercluster because there could be many tenants,
// and we don't want to interrupt the process when moving to the next tenant.
question := foundSeriesMsg + ". Continue?"
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
} else {

View File

@@ -11,11 +11,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
)
@@ -52,9 +50,8 @@ var (
type InsertCtx struct {
Labels sortedLabels
mrs []storage.MetricRow
mms []metricsmetadata.Row
metricNameBuf []byte
mrs []storage.MetricRow
metricNamesBuf []byte
relabelCtx relabel.Ctx
streamAggrCtx streamAggrCtx
@@ -76,13 +73,8 @@ func (ctx *InsertCtx) Reset(rowsLen int) {
}
mrs = slicesutil.SetLength(mrs, rowsLen)
ctx.mrs = mrs[:0]
mms := ctx.mms
for i := range mms {
cleanMetricMetadata(&mms[i])
}
ctx.mms = mms[:0]
ctx.metricNameBuf = ctx.metricNameBuf[:0]
ctx.metricNamesBuf = ctx.metricNamesBuf[:0]
ctx.relabelCtx.Reset()
ctx.streamAggrCtx.Reset()
ctx.skipStreamAggr = false
@@ -92,20 +84,11 @@ func cleanMetricRow(mr *storage.MetricRow) {
mr.MetricNameRaw = nil
}
func cleanMetricMetadata(mm *metricsmetadata.Row) {
mm.MetricFamilyName = nil
mm.Unit = nil
mm.Help = nil
mm.Type = 0
mm.ProjectID = 0
mm.AccountID = 0
}
func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label) []byte {
start := len(ctx.metricNameBuf)
ctx.metricNameBuf = append(ctx.metricNameBuf, prefix...)
ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf, labels)
metricNameRaw := ctx.metricNameBuf[start:]
start := len(ctx.metricNamesBuf)
ctx.metricNamesBuf = append(ctx.metricNamesBuf, prefix...)
ctx.metricNamesBuf = storage.MarshalMetricNameRaw(ctx.metricNamesBuf, labels)
metricNameRaw := ctx.metricNamesBuf[start:]
return metricNameRaw[:len(metricNameRaw):len(metricNameRaw)]
}
@@ -160,7 +143,7 @@ func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float6
mr.MetricNameRaw = metricNameRaw
mr.Timestamp = timestamp
mr.Value = value
if len(ctx.metricNameBuf) > 16*1024*1024 {
if len(ctx.metricNamesBuf) > 16*1024*1024 {
if err := ctx.FlushBufs(); err != nil {
return err
}
@@ -168,55 +151,6 @@ func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float6
return nil
}
// WriteMetadata writes given prometheus protobuf metadata into the storage.
func (ctx *InsertCtx) WriteMetadata(mmpbs []prompb.MetricMetadata) error {
if len(mmpbs) == 0 {
return nil
}
mms := ctx.mms
mms = slicesutil.SetLength(mms, len(mmpbs))
for idx, mmpb := range mmpbs {
mm := &mms[idx]
mm.MetricFamilyName = bytesutil.ToUnsafeBytes(mmpb.MetricFamilyName)
mm.Help = bytesutil.ToUnsafeBytes(mmpb.Help)
mm.Type = mmpb.Type
mm.Unit = bytesutil.ToUnsafeBytes(mmpb.Unit)
}
err := vmstorage.AddMetadataRows(mms)
if err != nil {
return &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot store metrics metadata: %w", err),
StatusCode: http.StatusServiceUnavailable,
}
}
return nil
}
// WritePromMetadata writes given prometheus metric metadata into the storage
func (ctx *InsertCtx) WritePromMetadata(mmps []prometheus.Metadata) error {
if len(mmps) == 0 {
return nil
}
mms := ctx.mms
mms = slicesutil.SetLength(mms, len(mmps))
for idx, mmpb := range mmps {
mm := &mms[idx]
mm.MetricFamilyName = bytesutil.ToUnsafeBytes(mmpb.Metric)
mm.Help = bytesutil.ToUnsafeBytes(mmpb.Help)
mm.Type = mmpb.Type
}
err := vmstorage.AddMetadataRows(mms)
if err != nil {
return &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot store prometheus metrics metadata: %w", err),
StatusCode: http.StatusServiceUnavailable,
}
}
return nil
}
// AddLabelBytes adds (name, value) label to ctx.Labels.
//
// name and value must exist until ctx.Labels is used.
@@ -287,7 +221,7 @@ func (ctx *InsertCtx) FlushBufs() error {
}
}
func (ctx *InsertCtx) dropAggregatedRows(matchIdxs []uint32) {
func (ctx *InsertCtx) dropAggregatedRows(matchIdxs []byte) {
dst := ctx.mrs[:0]
src := ctx.mrs
if !*streamAggrDropInput {
@@ -305,4 +239,4 @@ func (ctx *InsertCtx) dropAggregatedRows(matchIdxs []uint32) {
ctx.mrs = dst
}
var matchIdxsPool slicesutil.BufferPool[uint32]
var matchIdxsPool bytesutil.ByteBufferPool

View File

@@ -13,7 +13,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/metrics"
@@ -23,11 +22,11 @@ var (
streamAggrConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
streamAggrKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in -streamAggr.config. "+
"By default, matched raw samples are aggregated and dropped, while unmatched samples are written to the remote storage. "+
streamAggrKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation with -streamAggr.config. "+
"By default, only aggregated samples are dropped, while the remaining samples are stored in the database. "+
"See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in -streamAggr.config. "+
"By default, only matched raw samples are dropped, while unmatched samples are written to the remote storage."+
streamAggrDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation with -streamAggr.config. "+
"By default, only aggregated samples are dropped, while the remaining samples are stored in the database. "+
"See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation with -streamAggr.config . "+
"See also -streamAggr.dropInputLabels and -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
@@ -190,7 +189,7 @@ func (ctx *streamAggrCtx) Reset() {
ctx.buf = ctx.buf[:0]
}
func (ctx *streamAggrCtx) push(mrs []storage.MetricRow, matchIdxs []uint32) []uint32 {
func (ctx *streamAggrCtx) push(mrs []storage.MetricRow, matchIdxs []byte) []byte {
mn := &ctx.mn
tss := ctx.tss
labels := ctx.labels
@@ -249,7 +248,7 @@ func (ctx *streamAggrCtx) push(mrs []storage.MetricRow, matchIdxs []uint32) []ui
if sas.IsEnabled() {
matchIdxs = sas.Push(tss, matchIdxs)
} else if deduplicator != nil {
matchIdxs = slicesutil.SetLength(matchIdxs, len(tss))
matchIdxs = bytesutil.ResizeNoCopyMayOverallocate(matchIdxs, len(tss))
for i := range matchIdxs {
matchIdxs[i] = 1
}

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
@@ -232,17 +231,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
firehose.WriteSuccessResponse(w, r)
return true
case "zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusAccepted)
return true
case "/newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -435,9 +423,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vm_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vm_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)

Some files were not shown because too many files have changed in this diff Show More