Compare commits

..

6 Commits

Author SHA1 Message Date
Max Kotliar
9430395c18 wip 2025-11-14 22:35:41 +02:00
Max Kotliar
d9e26de0a6 wip 2025-11-14 22:33:14 +02:00
Max Kotliar
13e7e04727 wip 2025-11-14 22:03:18 +02:00
Max Kotliar
f8b1b918c4 wip 2025-11-14 21:47:17 +02:00
Max Kotliar
db52cca9df release: wip 2025-11-14 20:32:41 +02:00
Max Kotliar
cdc374d8dd docs: Release guide slight automation
The commit automates several steps so they can be copy-pasted and
executed without any manual adjustments.

On success, it prints a message starting with “SUCCESS:” to make it
easier to distinguish from failures.

It also introduces the CURR_TAG and NEXT_TAG environment variables,
which are used by the automated commands.
2025-11-13 15:46:09 +02:00
2708 changed files with 175513 additions and 199323 deletions

View File

@@ -5,7 +5,7 @@ body:
- type: textarea
id: describe-the-component
attributes:
label: Is your question related to a specific component?
label: Is your question request related to a specific component?
placeholder: |
VictoriaMetrics, vmagent, vmalert, vmui, etc...
validations:

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env sh
set -e
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
GIT_REMOTE=${GIT_REMOTE:-"origin"}
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
if ! grep -q "^+" diff.txt; then
echo "No additions in CHANGELOG.md"
exit 0
fi
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
if [ -z "$START_TIP" ]; then
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
exit 1
fi
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
if [ -z "$END_TIP" ]; then
END_TIP=$(wc -l < "$CHANGELOG_FILE")
fi
BAD=0
while IFS= read -r line; do
# Grep exact line inside the file and get line numbers
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
for m in $MATCHES; do
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
BAD=1
fi
done
done << EOF
$ADDED_LINES
EOF
if [ "$BAD" -ne 0 ]; then
echo "CHANGELOG modifications must be placed inside the ## tip section."
exit 1
fi
echo "CHANGELOG modifications are valid."

View File

@@ -61,7 +61,7 @@ jobs:
arch: amd64
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
@@ -71,8 +71,7 @@ jobs:
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}

View File

@@ -1,19 +0,0 @@
name: 'changelog-linter'
on:
pull_request:
paths:
- "docs/victoriametrics/changelog/CHANGELOG.md"
jobs:
tip-lint:
runs-on: 'ubuntu-latest'
steps:
- uses: 'actions/checkout@v6'
with:
# needed for proper diff
fetch-depth: 0
- name: 'Validate that changelog changes are under ## tip'
run: |
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh

View File

@@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
fetch-depth: 0 # we need full history for commit verification

View File

@@ -21,11 +21,9 @@ jobs:
id: go
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
go-version: stable
cache: false
- run: go version
- name: Cache Go artifacts
uses: actions/cache@v4
with:
@@ -34,7 +32,7 @@ jobs:
~/go/pkg/mod
~/go/bin
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
- name: Check License
run: make check-licenses

View File

@@ -29,15 +29,14 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Set up Go
id: go
uses: actions/setup-go@v6
with:
cache: false
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Cache Go artifacts
uses: actions/cache@v4
@@ -47,7 +46,7 @@ jobs:
~/go/bin
~/go/pkg/mod
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
- name: Initialize CodeQL
uses: github/codeql-action/init@v4

View File

@@ -16,12 +16,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
path: __vm
- name: Checkout private code
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }}

View File

@@ -32,7 +32,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
@@ -42,9 +42,8 @@ jobs:
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
go-version: stable
- run: go version
- name: Cache golangci-lint
uses: actions/cache@v4
@@ -52,7 +51,7 @@ jobs:
path: |
~/.cache/golangci-lint
~/go/bin
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
- name: Run check-all
run: |
@@ -72,7 +71,7 @@ jobs:
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
@@ -82,24 +81,23 @@ jobs:
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Run tests
run: make ${{ matrix.scenario}}
run: GOGC=10 make ${{ matrix.scenario}}
- name: Publish coverage
uses: codecov/codecov-action@v5
with:
files: ./coverage.txt
apptest:
name: apptest
runs-on: apptest
integration:
name: integration
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
@@ -109,8 +107,7 @@ jobs:
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Run app tests
run: make apptest
- name: Run integration tests
run: make integration-test

View File

@@ -32,41 +32,35 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Cache node_modules
id: cache
uses: actions/cache@v5
- name: Setup Node
uses: actions/setup-node@v6
with:
path: app/vmui/packages/vmui/node_modules
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
restore-keys: |
vmui-deps-${{ runner.os }}-
node-version: '24.x'
- name: Install dependencies
if: steps.cache.outputs.cache-hit != 'true'
run: make vmui-install
- name: Cache node-modules
uses: actions/cache@v4
with:
path: |
app/vmui/packages/vmui/node_modules
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
restore-keys: vmui-artifacts-${{ runner.os }}-
- name: Run lint
id: lint
run: make vmui-lint
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run tests
id: test
run: make vmui-test
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run typecheck
id: typecheck
run: make vmui-typecheck
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Annotate Code Linting Results
uses: ataylorme/eslint-annotate-action@v3

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS
Copyright 2019-2026 VictoriaMetrics, Inc.
Copyright 2019-2025 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
TAR_OWNERSHIP ?= --owner=1000 --group=1000
GOLANGCI_LINT_VERSION := 2.9.0
GOLANGCI_LINT_VERSION := 2.4.0
.PHONY: $(MAKECMDGOALS)
@@ -435,7 +435,7 @@ release-vmutils-windows-goarch: \
vmctl-windows-$(GOARCH)-prod.exe
pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
fmt:
gofmt -l -w -s ./lib
@@ -443,7 +443,7 @@ fmt:
gofmt -l -w -s ./apptest
vet:
go vet -tags 'synctest' ./lib/...
GOEXPERIMENT=synctest go vet ./lib/...
go vet ./app/...
go vet ./apptest/...
@@ -452,52 +452,39 @@ check-all: fmt vet golangci-lint govulncheck
clean-checkers: remove-golangci-lint remove-govulncheck
test:
go test -tags 'synctest' ./lib/... ./app/...
GOEXPERIMENT=synctest go test ./lib/... ./app/...
test-race:
go test -tags 'synctest' -race ./lib/... ./app/...
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
test-pure:
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
test-full:
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
test-full-386:
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
integration-test:
$(MAKE) apptest
apptest:
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
apptest-legacy: victoria-metrics vmbackup vmrestore
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
VERSION=v1.132.0; \
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
DIR=/tmp/$${VERSION}; \
test -d $${DIR} || (mkdir $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
); \
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
go test ./apptest/tests -run="^TestLegacySingle.*"
go test ./apptest/... -skip="^TestCluster.*"
benchmark:
go test -run=NO_TESTS -bench=. ./lib/...
go test -run=NO_TESTS -bench=. ./app/...
GOEXPERIMENT=synctest go test -bench=. ./lib/...
go test -bench=. ./app/...
benchmark-pure:
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
CGO_ENABLED=0 go test -bench=. ./app/...
vendor-update:
go get -u ./lib/...
go get -u ./app/...
go mod tidy -compat=1.26
go mod tidy -compat=1.24
go mod vendor
app-local:
@@ -513,15 +500,14 @@ app-local-windows-goarch:
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
quicktemplate-gen: install-qtc
qtc -dir=lib
qtc -dir=app
qtc
install-qtc:
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
golangci-lint: install-golangci-lint
golangci-lint run --build-tags 'synctest'
GOEXPERIMENT=synctest golangci-lint run
install-golangci-lint:
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)

View File

@@ -16,21 +16,16 @@
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
</picture>
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
Here are some resources and information about VictoriaMetrics:
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE).
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions).
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
- Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
- Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.

View File

@@ -134,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{

View File

@@ -10,11 +10,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
)
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
func startSelfScraper() {
selfScraperStopCh = make(chan struct{})
selfScraperWG.Go(func() {
selfScraperWG.Add(1)
go func() {
defer selfScraperWG.Done()
selfScraper(*selfScrapeInterval)
})
}()
}
func stopSelfScraper() {
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
var bb bytesutil.ByteBuffer
var rows prometheus.Rows
var metadataRows prometheus.MetadataRows
var mrs []storage.MetricRow
var labels []prompb.Label
t := time.NewTicker(scrapeInterval)
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
appmetrics.WritePrometheusMetrics(&bb)
s := bytesutil.ToUnsafeString(bb.B)
rows.Reset()
// Parse metrics and optionally metadata when enabled
if prommetadata.IsEnabled() {
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
} else {
rows.UnmarshalWithErrLogger(s, nil)
}
// VictoriaMetrics components don't expose metadata yet, only need to parse samples
rows.UnmarshalWithErrLogger(s, nil)
mrs = mrs[:0]
for i := range rows.Rows {
r := &rows.Rows[i]
@@ -96,19 +91,6 @@ func selfScraper(scrapeInterval time.Duration) {
if err := vmstorage.AddRows(mrs); err != nil {
logger.Errorf("cannot store self-scraped metrics: %s", err)
}
if len(metadataRows.Rows) > 0 {
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
for _, mm := range metadataRows.Rows {
mms = append(mms, metricsmetadata.Row{
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
Help: bytesutil.ToUnsafeBytes(mm.Help),
Type: mm.Type,
})
}
if err := vmstorage.AddMetadataRows(mms); err != nil {
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
}
}
}
for {
select {

View File

@@ -33,13 +33,13 @@ func PopulateTimeTpl(b []byte, tGlobal time.Time) []byte {
}
switch strings.TrimSpace(parts[0]) {
case `TIME_S`:
return fmt.Appendf(nil, "%d", t.Unix())
return []byte(fmt.Sprintf("%d", t.Unix()))
case `TIME_MSZ`:
return fmt.Appendf(nil, "%d", t.Unix()*1e3)
return []byte(fmt.Sprintf("%d", t.Unix()*1e3))
case `TIME_MS`:
return fmt.Appendf(nil, "%d", timeToMillis(t))
return []byte(fmt.Sprintf("%d", timeToMillis(t)))
case `TIME_NS`:
return fmt.Appendf(nil, "%d", t.UnixNano())
return []byte(fmt.Sprintf("%d", t.UnixNano()))
default:
log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
}

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -245,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>vmagent</h2>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{
@@ -352,17 +350,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
firehose.WriteSuccessResponse(w, r)
return true
case "/zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -657,17 +644,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
}
firehose.WriteSuccessResponse(w, r)
return true
case "zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -789,9 +765,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)

View File

@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(samplesCount)
rowsInserted.Add(len(rows))
if at != nil {
rowsTenantInserted.Get(at).Add(samplesCount)
}

View File

@@ -25,7 +25,7 @@ var (
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
)
// InsertHandlerForReader processes metrics from given reader.
// InsertHandler processes metrics from given reader.
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, nil)

View File

@@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -202,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
return float64(concurrency)
return float64(*queues)
})
for range concurrency {
c.wg.Go(c.runWorker)
for i := 0; i < concurrency; i++ {
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.runWorker()
}()
}
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
}
@@ -549,9 +554,9 @@ func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.D
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
plainBlock := make([]byte, 0, len(zstdBlock)*2)
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
if err != nil {
return nil, err
return nil, fmt.Errorf("zstd: decompress: %s", err)
}
return snappy.Encode(nil, plainBlock), nil

View File

@@ -18,7 +18,7 @@ func TestCalculateRetryDuration(t *testing.T) {
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
t.Helper()
for range n {
for i := 0; i < n; i++ {
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
}

View File

@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
ps.wr.significantFigures = significantFigures
ps.wr.roundDigits = roundDigits
ps.stopCh = make(chan struct{})
ps.periodicFlusherWG.Go(ps.periodicFlusher)
ps.periodicFlusherWG.Add(1)
go func() {
defer ps.periodicFlusherWG.Done()
ps.periodicFlusher()
}()
return &ps
}

View File

@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
var wr prompb.WriteRequest
for i := range seriesCount {
for i := 0; i < seriesCount; i++ {
var labels []prompb.Label
for j := range labelsCount {
for j := 0; j < labelsCount; j++ {
labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d_%d", i, j),
Value: fmt.Sprintf("value_%d_%d", i, j),

View File

@@ -9,18 +9,19 @@ import (
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/metrics"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"go.yaml.in/yaml/v3"
"github.com/VictoriaMetrics/metrics"
)
var (
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
"The path can point either to local file or to http url. "+
@@ -38,7 +39,7 @@ var (
labelsGlobal []prompb.Label
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
remoteWriteURLRelabelConfigData atomic.Pointer[[]interface{}]
relabelConfigReloads *metrics.Counter
relabelConfigReloadErrors *metrics.Counter
@@ -90,8 +91,8 @@ func WriteURLRelabelConfigData(w io.Writer) {
return
}
type urlRelabelCfg struct {
Url string `yaml:"url"`
RelabelConfig any `yaml:"relabel_config"`
Url string `yaml:"url"`
RelabelConfig interface{} `yaml:"relabel_config"`
}
var cs []urlRelabelCfg
for i, url := range *remoteWriteURLs {
@@ -138,13 +139,12 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
remoteWriteRelabelConfigData.Store(&rawCfg)
rcs.global = global
}
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
}
var urlRelabelCfgs []any
var urlRelabelCfgs []interface{}
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
for i, path := range *relabelConfigPaths {
if len(path) == 0 {
@@ -157,7 +157,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
}
rcs.perURL[i] = prc
var parsedCfg any
var parsedCfg interface{}
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
}
@@ -176,9 +176,19 @@ type relabelConfigs struct {
perURL []*promrelabel.ParsedConfigs
}
// isSet indicates whether (global or per-URL) command-line flags is set
func (rcs *relabelConfigs) isSet() bool {
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0
if rcs == nil {
return false
}
if rcs.global.Len() > 0 {
return true
}
for _, pc := range rcs.perURL {
if pc.Len() > 0 {
return true
}
}
return false
}
// initLabelsGlobal must be called after parsing command-line flags.

View File

@@ -59,7 +59,7 @@ var (
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
"isn't enough for sending high volume of collected data to remote storage. "+
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
@@ -176,6 +176,13 @@ func Init() {
})
}
if *queues > maxQueues {
*queues = maxQueues
}
if *queues <= 0 {
*queues = 1
}
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
@@ -208,7 +215,9 @@ func Init() {
dropDanglingQueues()
// Start config reloader.
configReloaderWG.Go(func() {
configReloaderWG.Add(1)
go func() {
defer configReloaderWG.Done()
for {
select {
case <-configReloaderStopCh:
@@ -218,7 +227,7 @@ func Init() {
reloadRelabelConfigs()
reloadStreamAggrConfigs()
}
})
}()
}
func dropDanglingQueues() {
@@ -258,6 +267,17 @@ func initRemoteWriteCtxs(urls []string) {
if len(urls) == 0 {
logger.Panicf("BUG: urls must be non-empty")
}
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
if maxInmemoryBlocks / *queues > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * *queues
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
rwctxs := make([]*remoteWriteCtx, len(urls))
rwctxIdx := make([]int, len(urls))
if retryMaxTime.String() != "" {
@@ -272,7 +292,7 @@ func initRemoteWriteCtxs(urls []string) {
if *showRemoteWriteURL {
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
}
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL)
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
rwctxIdx[i] = i
}
@@ -538,9 +558,11 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
// Push metadata to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs {
wg.Go(func() {
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.tryPushMetadataInternal(mms) {
rwctx.pushFailures.Inc()
if forceDropSamplesOnFailure {
@@ -549,7 +571,7 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
}
anyPushFailed.Store(true)
}
})
}(rwctx)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -581,13 +603,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
// Push tssBlock to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs {
wg.Go(func() {
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
anyPushFailed.Store(true)
}
})
}(rwctx)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -609,11 +633,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
if len(shard) == 0 {
continue
}
wg.Go(func() {
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) {
wg.Add(1)
go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
anyPushFailed.Store(true)
}
})
}(rwctx, shard)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -822,7 +848,7 @@ type remoteWriteCtx struct {
rowsDroppedOnPushFailure *metrics.Counter
}
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
// strip query params, otherwise changing params resets pq
pqURL := *remoteWriteURL
pqURL.RawQuery = ""
@@ -837,23 +863,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
}
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
queuesSize := queues.GetOptionalArg(argIdx)
if queuesSize > maxQueues {
queuesSize = maxQueues
} else if queuesSize <= 0 {
queuesSize = 1
}
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
if maxInmemoryBlocks/queuesSize > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * queuesSize
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
return float64(fq.GetPendingBytes())
@@ -871,16 +880,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
var c *client
switch remoteWriteURL.Scheme {
case "http", "https":
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize)
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
default:
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
}
c.init(argIdx, queuesSize, sanitizedURL)
c.init(argIdx, *queues, sanitizedURL)
// Initialize pss
sf := significantFigures.GetOptionalArg(argIdx)
rd := roundDigits.GetOptionalArg(argIdx)
pssLen := queuesSize
pssLen := *queues
if n := cgroup.AvailableCPUs(); pssLen > n {
// There is no sense in running more than availableCPUs concurrent pendingSeries,
// since every pendingSeries can saturate up to a single CPU.
@@ -1080,7 +1089,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
}()
if len(labelsGlobal) > 0 {
// Make a copy of tss before adding extra labels to prevent
// Make a copy of tss before adding extra labels in order to prevent
// from affecting time series for other remoteWrite.url configs.
rctx = getRelabelCtx()
v = tssPool.Get().(*[]prompb.TimeSeries)

View File

@@ -28,12 +28,12 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
itemsCount := 1_000 * bucketsCount
m := make([]int, bucketsCount)
var labels []prompb.Label
for i := range itemsCount {
for i := 0; i < itemsCount; i++ {
labels = append(labels[:0], prompb.Label{
Name: "__name__",
Value: fmt.Sprintf("some_name_%d", i),
})
for j := range 10 {
for j := 0; j < 10; j++ {
labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d", j),
Value: fmt.Sprintf("value_%d_%d", i, j),
@@ -248,7 +248,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
seriesCount := 100000
// build 1000000 series
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
for i := range seriesCount {
for i := 0; i < seriesCount; i++ {
tssBlock = append(tssBlock, prompb.TimeSeries{
Labels: []prompb.Label{
{
@@ -269,7 +269,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
// build active time series set
nodes := make([]string, 0, remoteWriteCount)
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
for i := range remoteWriteCount {
for i := 0; i < remoteWriteCount; i++ {
nodes = append(nodes, fmt.Sprintf("node%d", i))
activeTimeSeriesByNodes[i] = make(map[string]struct{})
}

View File

@@ -1,80 +0,0 @@
package zabbixconnector
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
)
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := len(rows)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
samples = append(samples, prompb.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -41,7 +41,7 @@ func TestParseInputValue_Success(t *testing.T) {
if len(outputExpected) != len(output) {
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
}
for i := range outputExpected {
for i := 0; i < len(outputExpected); i++ {
if outputExpected[i].Omitted != output[i].Omitted {
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
}

View File

@@ -4,7 +4,6 @@ import (
"context"
"flag"
"fmt"
"maps"
"net"
"net/http"
"net/http/httptest"
@@ -13,7 +12,6 @@ import (
"os/signal"
"path/filepath"
"reflect"
"slices"
"sort"
"strings"
"syscall"
@@ -350,7 +348,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
for k := range alertEvalTimesMap {
alertEvalTimes = append(alertEvalTimes, k)
}
slices.Sort(alertEvalTimes)
sort.Slice(alertEvalTimes, func(i, j int) bool {
return alertEvalTimes[i] < alertEvalTimes[j]
})
// sort group eval order according to the given "group_eval_order".
sort.Slice(testGroups, func(i, j int) bool {
@@ -361,8 +361,12 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
var groups []*rule.Group
for _, group := range testGroups {
mergedExternalLabels := make(map[string]string)
maps.Copy(mergedExternalLabels, tg.ExternalLabels)
maps.Copy(mergedExternalLabels, externalLabels)
for k, v := range tg.ExternalLabels {
mergedExternalLabels[k] = v
}
for k, v := range externalLabels {
mergedExternalLabels[k] = v
}
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
ng.Init()
groups = append(groups, ng)

View File

@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
@@ -343,6 +343,7 @@ func TestGroupValidate_Failure(t *testing.T) {
},
},
}, true, "bad prometheus expr")
}
func TestGroupValidate_Success(t *testing.T) {

View File

@@ -2,7 +2,6 @@ package config
import (
"fmt"
"slices"
"strings"
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
@@ -77,12 +76,13 @@ func (t *Type) ValidateExpr(expr string) error {
if err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
}
labels, err := q.GetStatsLabels()
if err != nil {
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
}
if slices.Contains(labels, "_time") {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
fields, _ := q.GetStatsByFields()
for i := range fields {
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
// making the result meaningless and may lead to cardinality issues.
if fields[i] == "_time" {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
}
}
default:
return fmt.Errorf("unknown datasource type=%q", t.Name)

View File

@@ -5,7 +5,6 @@ import (
"errors"
"fmt"
"io"
"maps"
"net/http"
"net/url"
"strings"
@@ -92,7 +91,9 @@ func (c *Client) Clone() *Client {
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
copy(ns.extraHeaders, c.extraHeaders)
}
maps.Copy(ns.extraParams, c.extraParams)
for k, v := range c.extraParams {
ns.extraParams[k] = v
}
return ns
}

View File

@@ -34,7 +34,7 @@ type promResponse struct {
// Stats supported by VictoriaMetrics since v1.90
Stats struct {
SeriesFetched *string `json:"seriesFetched,omitempty"`
} `json:"stats"`
} `json:"stats,omitempty"`
// IsPartial supported by VictoriaMetrics
IsPartial *bool `json:"isPartial,omitempty"`
}

View File

@@ -134,7 +134,7 @@ func (ls Labels) String() string {
func LabelCompare(a, b Labels) int {
l := min(len(b), len(a))
for i := range l {
for i := 0; i < l; i++ {
if a[i].Name != b[i].Name {
if a[i].Name < b[i].Name {
return -1

View File

@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
b.Run("Instant std+fastjson", func(b *testing.B) {
for range b.N {
for i := 0; i < b.N; i++ {
var pi promInstant
err = pi.Unmarshal(data)
if err != nil {

View File

@@ -81,7 +81,9 @@ absolute path to all .tpl files in root.
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
)
var extURL *url.URL
var (
extURL *url.URL
)
func main() {
// Write flags and help message to stdout, since it is easier to grep or pipe.
@@ -159,7 +161,7 @@ func main() {
ctx, cancel := context.WithCancel(context.Background())
manager, err := newManager(ctx)
if err != nil {
logger.Fatalf("failed to create manager: %s", err)
logger.Fatalf("failed to init: %s", err)
}
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)

View File

@@ -3,7 +3,6 @@ package main
import (
"context"
"fmt"
"strconv"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
@@ -46,15 +45,13 @@ func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
g, ok := m.groups[gID]
if !ok {
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
ruleID := strconv.FormatUint(rID, 10)
for _, r := range g.Rules {
if r.ID == ruleID {
return r, nil
if r.ID() == rID {
return r.ToAPI(), nil
}
}
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
@@ -65,20 +62,17 @@ func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
g, ok := m.groups[gID]
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
ar, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
alertID := strconv.FormatUint(aID, 10)
for _, a := range r.Alerts {
if a.ID == alertID {
return a, nil
}
if apiAlert := ar.AlertToAPI(aID); apiAlert != nil {
return apiAlert, nil
}
}
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)

View File

@@ -65,11 +65,13 @@ func TestManagerUpdateConcurrent(t *testing.T) {
const workers = 500
const iterations = 10
var wg sync.WaitGroup
for n := range workers {
wg.Go(func() {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n)))
for range iterations {
for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths))
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
if err != nil { // update can fail and this is expected
@@ -77,7 +79,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
}
_ = m.update(context.Background(), cfg, false)
}
})
}(i)
}
wg.Wait()
}
@@ -259,7 +261,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
for i, r := range a.Rules {
got, want := r, b.Rules[i]
if a.CreateID() != b.CreateID() {
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID())
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
}
if err := rule.CompareRules(t, want, got); err != nil {
t.Fatalf("comparison error: %s", err)

View File

@@ -80,15 +80,14 @@ func (as AlertState) String() string {
// AlertTplData is used to execute templating
type AlertTplData struct {
Type string
Labels map[string]string
Value float64
Expr string
AlertID uint64
GroupID uint64
ActiveAt time.Time
For time.Duration
IsPartial bool
Type string
Labels map[string]string
Value float64
Expr string
AlertID uint64
GroupID uint64
ActiveAt time.Time
For time.Duration
}
var tplHeaders = []string{
@@ -102,7 +101,6 @@ var tplHeaders = []string{
"{{ $groupID := .GroupID }}",
"{{ $activeAt := .ActiveAt }}",
"{{ $for := .For }}",
"{{ $isPartial := .IsPartial }}",
}
// ExecTemplate executes the Alert template for given
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
ctmpl, _ := tmpl.Clone()
ctmpl = ctmpl.Option("missingkey=zero")
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
r[key] = err.Error()
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err))
r[key] = text
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
continue
}
r[key] = buf.String()
@@ -186,13 +184,13 @@ type tplData struct {
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
tpl, err := tpl.Parse(text)
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
return fmt.Errorf("error parsing annotation template: %w", err)
}
if !execute {
return nil
}
if err = tpl.Execute(dst, data); err != nil {
return fmt.Errorf("error evaluating template: %w", err)
return fmt.Errorf("error evaluating annotation template: %w", err)
}
return nil
}

View File

@@ -3,7 +3,6 @@ package notifier
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
@@ -14,6 +13,7 @@ import (
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
@@ -86,11 +86,6 @@ func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels []
err := am.send(ctx, alerts, alertLabels, headers)
am.metrics.alertsSendDuration.UpdateDuration(startTime)
if err != nil {
// the context can be cancelled on graceful shutdown
// or on group update. So no need to handle the error as usual.
if errors.Is(err, context.Canceled) {
return nil
}
am.metrics.alertsSendErrors.Add(len(alerts))
am.lastError = err.Error()
} else {
@@ -171,6 +166,11 @@ const alertManagerPath = "/api/v2/alerts"
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
) (*AlertManager, error) {
if err := httputil.CheckURL(alertManagerURL); err != nil {
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
}
tls := &promauth.TLSConfig{}
if authCfg.TLSConfig != nil {
tls = authCfg.TLSConfig

View File

@@ -212,16 +212,18 @@ consul_sd_configs:
const workers = 500
const iterations = 10
var wg sync.WaitGroup
for n := range workers {
wg.Go(func() {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n)))
for range iterations {
for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths))
_ = cw.reload(paths[rnd]) // update can fail and this is expected
_ = cw.notifiers()
}
})
}(i)
}
wg.Wait()
}

View File

@@ -11,8 +11,8 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
@@ -229,9 +229,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
Headers: []string{headers.GetOptionalArg(i)},
}
if err := httputil.CheckURL(addr); err != nil {
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
}
addr = strings.TrimSuffix(addr, "/")
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
if err != nil {
@@ -269,7 +266,7 @@ func GetTargets() map[TargetType][]Target {
if getActiveNotifiers == nil {
return nil
}
targets := make(map[TargetType][]Target)
var targets = make(map[TargetType][]Target)
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
if cw != nil {
cw.targetsMu.RLock()
@@ -290,7 +287,7 @@ func GetTargets() map[TargetType][]Target {
}
// Send sends alerts to all active notifiers
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) *vmalertutil.ErrGroup {
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
// apply global relabel config first without modifying original alerts in alerts
@@ -303,18 +300,17 @@ func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string
lblss = append(lblss, lbls)
}
errGr := new(vmalertutil.ErrGroup)
wg := sync.WaitGroup{}
activeNotifiers := getActiveNotifiers()
errCh := make(chan error, len(activeNotifiers))
defer close(errCh)
for i := range activeNotifiers {
nt := activeNotifiers[i]
wg.Go(func() {
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
errGr.Add(fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err))
}
})
}
wg.Wait()
return errCh
return errGr
}

View File

@@ -55,9 +55,9 @@ func TestInitNegative(t *testing.T) {
*blackHole = oldBlackHole
}()
f := func(path string, addr []string, bh bool) {
f := func(path, addr string, bh bool) {
*configPath = path
*addrs = flagutil.ArrayString(addr)
*addrs = flagutil.ArrayString{addr}
*blackHole = bh
if err := Init(nil, ""); err == nil {
t.Fatalf("expected to get error; got nil instead")
@@ -65,12 +65,9 @@ func TestInitNegative(t *testing.T) {
}
// *configPath, *addrs and *blackhole are mutually exclusive
f("/dummy/path", []string{"127.0.0.1"}, false)
f("/dummy/path", []string{}, true)
f("", []string{"127.0.0.1"}, true)
// addr cannot be ""
f("", []string{""}, false)
f("", []string{"127.0.0.1", ""}, false)
f("/dummy/path", "127.0.0.1", false)
f("/dummy/path", "", true)
f("", "127.0.0.1", true)
}
func TestBlackHole(t *testing.T) {
@@ -205,9 +202,7 @@ alert_relabel_configs:
},
}
errG := Send(context.Background(), firingAlerts, nil)
for err := range errG {
if err != nil {
t.Errorf("unexpected error when sending alerts: %s", err)
}
if errG.Err() != nil {
t.Fatalf("unexpected error when sending alerts: %s", err)
}
}

View File

@@ -113,7 +113,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
}
for range cc {
for i := 0; i < cc; i++ {
c.run(ctx)
}
return c, nil
@@ -238,10 +238,8 @@ func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
defer func() {
sendDuration.Add(time.Since(timeStart).Seconds())
}()
attempts := 0
L:
for {
for attempts := 0; ; attempts++ {
err := c.send(ctx, b)
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
// Something in the middle between client and destination might be closing
@@ -283,7 +281,6 @@ L:
time.Sleep(retryInterval)
retryInterval *= 2
attempts++
}
rwErrors.Inc()

View File

@@ -44,7 +44,7 @@ func TestClient_Push(t *testing.T) {
r := rand.New(rand.NewSource(1))
const rowsN = int(1e4)
for range rowsN {
for i := 0; i < rowsN; i++ {
s := prompb.TimeSeries{
Samples: []prompb.Sample{{
Value: r.Float64(),
@@ -102,7 +102,7 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
}
// push time series to the client.
for range pushCnt {
for i := 0; i < pushCnt; i++ {
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
t.Fatalf("cannot time series to the client: %s", err)
}

View File

@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
const rowsN = 100
var sent int
for i := range rowsN {
for i := 0; i < rowsN; i++ {
s := prompb.TimeSeries{
Samples: []prompb.Sample{{
Value: float64(i),

View File

@@ -2,7 +2,6 @@ package rule
import (
"context"
"errors"
"fmt"
"hash/fnv"
"math"
@@ -247,6 +246,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
return alerts
}
// GetAlert returns alert if id exists
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
if ar.alerts == nil {
return nil
}
return ar.alerts[id]
}
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
if !ar.Debug {
return
@@ -312,11 +321,6 @@ type labelSet struct {
// On k conflicts in origin set, the original value is preferred and copied
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
func (ls *labelSet) add(k, v string) {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if v == "" {
return
}
ls.processed[k] = v
ov, ok := ls.origin[k]
if !ok {
@@ -346,13 +350,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
ls.processed[l.Name] = l.Value
}
// labels only support limited templating variables,
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
Labels: ls.origin,
Value: m.Values[0],
Expr: ar.Expr,
})
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %w", err)
}
for k, v := range extraLabels {
ls.add(k, v)
}
@@ -363,7 +368,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
if !*disableAlertGroupLabel && ar.GroupName != "" {
ls.add(alertGroupNameLabel, ar.GroupName)
}
return ls, err
return ls, nil
}
// execRange executes alerting rule on the given time range similarly to exec.
@@ -389,7 +394,11 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
return nil, err
}
alertID := hash(ls.processed)
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert
as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
if err != nil {
return nil, err
}
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
prevT := time.Time{}
for i := range s.Values {
@@ -405,6 +414,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
// reset to Pending if there are gaps > EvalInterval between DPs
a.State = notifier.StatePending
a.ActiveAt = at
// re-template the annotations as active timestamp is changed
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
a.Start = time.Time{}
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
a.State = notifier.StateFiring
@@ -450,7 +461,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
defer func() {
ar.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
if curState.Err != nil {
ar.metrics.errors.Inc()
}
}()
@@ -459,8 +470,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
}
isPartial := isPartialResponse(res)
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
qFn := func(query string) ([]datasource.Metric, error) {
res, _, err := ar.q.Query(ctx, query, ts)
return res.Data, err
@@ -474,9 +484,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
for i, m := range res.Data {
ls, err := ar.expandLabelTemplates(m, qFn)
if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
return nil, curState.Err
}
at := ts
alertID := hash(ls.processed)
@@ -486,11 +495,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
at = a.ActiveAt
}
}
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial)
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
return nil, curState.Err
}
expandedLabels[i] = ls
expandedAnnotations[i] = as
@@ -599,26 +607,25 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
ls, err := ar.toLabels(m, qFn)
if err != nil {
return ls, fmt.Errorf("failed to expand label templates: %s", err)
return nil, fmt.Errorf("failed to expand label templates: %s", err)
}
return ls, nil
}
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) {
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
tplData := notifier.AlertTplData{
Value: m.Values[0],
Type: ar.Type.String(),
Labels: ls.origin,
Expr: ar.Expr,
AlertID: hash(ls.processed),
GroupID: ar.GroupID,
ActiveAt: activeAt,
For: ar.For,
IsPartial: isPartial,
Value: m.Values[0],
Type: ar.Type.String(),
Labels: ls.origin,
Expr: ar.Expr,
AlertID: hash(ls.processed),
GroupID: ar.GroupID,
ActiveAt: activeAt,
For: ar.For,
}
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
if err != nil {
return as, fmt.Errorf("failed to expand annotation templates: %s", err)
return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
}
return as, nil
}
@@ -818,9 +825,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
res, _, err := q.Query(ctx, expr, ts)
if err != nil {
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
}

View File

@@ -1,106 +0,0 @@
//go:build synctest
package rule
import (
"context"
"strings"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -10,6 +10,7 @@ import (
"strings"
"sync"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/metrics"
@@ -663,7 +664,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-pending",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-pending"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
@@ -683,7 +684,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-firing",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-firing"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "1000"},
State: notifier.StateFiring,
ActiveAt: time.Unix(1, 0),
Start: time.Unix(5, 0),
@@ -704,7 +705,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-hold-pending",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-hold-pending"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
@@ -1119,7 +1120,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
}
func TestAlertingRule_Template(t *testing.T) {
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) {
f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
t.Helper()
fakeGroup := Group{
@@ -1132,7 +1133,6 @@ func TestAlertingRule_Template(t *testing.T) {
entries: make([]StateEntry, 10),
}
fq.Add(metrics...)
fq.SetPartialResponse(isResponsePartial)
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected error: %s", err)
@@ -1163,7 +1163,7 @@ func TestAlertingRule_Template(t *testing.T) {
}, []datasource.Metric{
metricWithValueAndLabels(t, 1, "instance", "foo"),
metricWithValueAndLabels(t, 1, "instance", "bar"),
}, false, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
Annotations: map[string]string{
"summary": `common: Too high connection number for "foo"`,
@@ -1192,14 +1192,14 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}",
},
Annotations: map[string]string{
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
},
alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
}, false, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
Labels: map[string]string{
alertNameLabel: "override label",
@@ -1207,7 +1207,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo",
},
Annotations: map[string]string{
"summary": `first: Too high connection number for "foo".`,
"summary": `first: Too high connection number for "foo"`,
"description": `override: It is 2 connections for "foo"`,
},
},
@@ -1218,7 +1218,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "bar",
},
Annotations: map[string]string{
"summary": `second: Too high connection number for "bar".`,
"summary": `second: Too high connection number for "bar"`,
"description": `override: It is 10 connections for "bar"`,
},
},
@@ -1231,7 +1231,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}",
},
Annotations: map[string]string{
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
},
alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{
@@ -1239,7 +1239,7 @@ func TestAlertingRule_Template(t *testing.T) {
alertNameLabel, "originAlertname",
alertGroupNameLabel, "originGroupname",
"instance", "foo"),
}, true, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{
alertNameLabel: "OriginLabels",
"exported_alertname": "originAlertname",
@@ -1255,7 +1255,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo",
},
Annotations: map[string]string{
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`,
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
},
},
})
@@ -1370,10 +1370,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
ar := &AlertingRule{
Labels: map[string]string{
"instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal
"invalid_label": "{{ .Values.mustRuntimeFail }}",
"empty_label": "", // this should be dropped
"instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal
},
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
Name: "AlertingRulesError",
@@ -1381,11 +1379,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
}
expectedOriginLabels := map[string]string{
"instance": "0.0.0.0:8800",
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
"instance": "0.0.0.0:8800",
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
}
expectedProcessedLabels := map[string]string{
@@ -1395,12 +1392,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
"exported_alertname": "ConfigurationReloadFailure",
"group": "vmalert",
"alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
}
ls, err := ar.toLabels(metric, nil)
if err == nil || !strings.Contains(err.Error(), "error evaluating template") {
t.Fatalf("unexpected error %q", err.Error())
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
@@ -1478,3 +1474,95 @@ func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
}
}
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -6,7 +6,6 @@ import (
"flag"
"fmt"
"hash/fnv"
"maps"
"net/url"
"sync"
"time"
@@ -19,7 +18,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
@@ -31,8 +29,8 @@ var (
"0 means no limit.")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
"which by default is 4 times evaluationInterval of the parent group")
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
@@ -98,7 +96,9 @@ type groupMetrics struct {
// set2 has priority over set1.
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
r := map[string]string{}
maps.Copy(r, set1)
for k, v := range set1 {
r[k] = v
}
for k, v := range set2 {
if prevV, ok := r[k]; ok {
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
@@ -374,7 +374,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
g.infof("started")
eval := func(ctx context.Context, ts time.Time) time.Time {
eval := func(ctx context.Context, ts time.Time) {
g.metrics.iterationTotal.Inc()
start := time.Now()
@@ -382,7 +382,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
if len(g.Rules) < 1 {
g.metrics.iterationDuration.UpdateDuration(start)
g.LastEvaluation = start
return ts
return
}
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
@@ -396,7 +396,6 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
}
g.metrics.iterationDuration.UpdateDuration(start)
g.LastEvaluation = start
return ts
}
evalCtx, cancel := context.WithCancel(ctx)
@@ -405,7 +404,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
g.mu.Unlock()
defer g.evalCancel()
realEvalTS := eval(evalCtx, evalTS)
eval(evalCtx, evalTS)
t := time.NewTicker(g.Interval)
defer t.Stop()
@@ -413,7 +412,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
// restore the rules state after the first evaluation
// so only active alerts can be restored.
if rr != nil {
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
if err != nil {
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
}
@@ -494,8 +493,11 @@ func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Dura
}
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
// artificially limit interval, so groups with big intervals could start sooner.
interval := min(g.Interval, maxDelay)
interval := g.Interval
if interval > maxDelay {
// artificially limit interval, so groups with big intervals could start sooner.
interval = maxDelay
}
var randSleep time.Duration
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
@@ -753,7 +755,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
}
var errG vmalertutil.ErrGroup
if e.Rw != nil {
pushToRW := func(tss []prompb.TimeSeries) error {
var lastErr error
@@ -765,26 +766,20 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return lastErr
}
if err := pushToRW(tss); err != nil {
errG.Add(err)
return err
}
}
ar, ok := r.(*AlertingRule)
if !ok {
return errG.Err()
return nil
}
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
if len(alerts) < 1 {
return errG.Err()
return nil
}
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
for err := range notifierErr {
if err != nil {
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
}
}
return errG.Err()
errGr := notifier.Send(ctx, alerts, e.notifierHeaders)
return errGr.Err()
}

View File

@@ -405,8 +405,7 @@ func TestGroupStart(t *testing.T) {
var cur uint64
prev := g.metrics.iterationTotal.Get()
i := 0
for {
for i := 0; ; i++ {
if i > 40 {
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
}
@@ -415,7 +414,6 @@ func TestGroupStart(t *testing.T) {
return
}
time.Sleep(interval)
i++
}
}

View File

@@ -2,7 +2,6 @@ package rule
import (
"context"
"errors"
"fmt"
"strings"
"time"
@@ -198,7 +197,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
defer func() {
rr.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
if curState.Err != nil {
rr.metrics.errors.Inc()
}
}()
@@ -237,8 +236,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
Labels: stringToLabels(k),
Samples: []prompb.Sample{
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
},
})
}})
}
rr.lastEvaluation = curEvaluation
return tss, nil
@@ -293,11 +291,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
}
// add extra labels configured by user
for k := range rr.Labels {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if rr.Labels[k] == "" {
continue
}
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
if existingLabel != nil { // there is a conflict between extra and existing label
if existingLabel.Value == rr.Labels[k] {

View File

@@ -121,7 +121,7 @@ func (s *ruleState) add(e StateEntry) {
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
var err error
var tss []prompb.TimeSeries
for i := range replayRuleRetryAttempts {
for i := 0; i < replayRuleRetryAttempts; i++ {
tss, err = r.execRange(context.Background(), start, end)
if err == nil {
break

View File

@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
}
var last time.Time
for range stateEntriesN * 2 {
for i := 0; i < stateEntriesN*2; i++ {
last = time.Now()
r.state.add(StateEntry{At: last})
}
@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
const workers = 50
const iterations = 100
var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for range iterations {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
r.state.add(StateEntry{At: time.Now()})
r.state.getAll()
r.state.getLast()
}
})
}()
}
wg.Wait()
}

View File

@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
case *AlertingRule:
br, ok := b.(*AlertingRule)
if !ok {
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID())
return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
}
return compareAlertingRules(t, v, br)
case *RecordingRule:
br, ok := b.(*RecordingRule)
if !ok {
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID())
return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
}
return compareRecordingRules(t, v, br)
default:

View File

@@ -209,6 +209,15 @@ func (ar *AlertingRule) AlertsToAPI() []*ApiAlert {
return alerts
}
// AlertToAPI generates apiAlert object from alert by its id(hash)
func (ar *AlertingRule) AlertToAPI(id uint64) *ApiAlert {
a := ar.GetAlert(id)
if a == nil {
return nil
}
return NewAlertAPI(ar, a)
}
// NewAlertAPI creates apiAlert for notifier.Alert
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
aa := &ApiAlert{

View File

@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
return ""
}
var b strings.Builder
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
for i, err := range eg.errs {
b.WriteString(err.Error())
if i != len(eg.errs)-1 {

View File

@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
}
f(nil, "")
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
f([]error{errors.New("timeout")}, "errors(1): timeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
}
// TestErrGroupConcurrent supposed to test concurrent
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
const writersN = 4
payload := make(chan error, writersN)
for range writersN {
for i := 0; i < writersN; i++ {
go func() {
for err := range payload {
eg.Add(err)
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
}
const iterations = 500
for i := range iterations {
for i := 0; i < iterations; i++ {
payload <- fmt.Errorf("error %d", i)
if i%10 == 0 {
_ = eg.Err()

View File

@@ -412,18 +412,18 @@ func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
defer rh.m.groupsMu.RUnlock()
var gAlerts []rule.GroupAlerts
for _, group := range rh.m.groups {
for _, g := range rh.m.groups {
var alerts []*rule.ApiAlert
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
a, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
alerts = append(alerts, r.Alerts...)
alerts = append(alerts, a.AlertsToAPI()...)
}
if len(alerts) > 0 {
gAlerts = append(gAlerts, rule.GroupAlerts{
Group: g,
Group: g.ToAPI(),
Alerts: alerts,
})
}
@@ -444,12 +444,12 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
if !rf.matchesGroup(group) {
continue
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
for _, r := range group.Rules {
a, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...)
lr.Data.Alerts = append(lr.Data.Alerts, a.AlertsToAPI()...)
}
}

View File

@@ -9,7 +9,6 @@
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
) %}
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
@@ -79,8 +78,6 @@
{% func Welcome(r *http.Request) %}
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
<p>
Version {%s buildinfo.Version %} <br>
API:<br>
{% for _, p := range apiLinks %}
{%code p, doc := p[0], p[1] %}
@@ -605,11 +602,11 @@
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th scope="col" title="The time when the rule was executed">Updated at</th>
<th scope="col" title="The time when event was created">Updated at</th>
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th>
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
</tr>
</thead>

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,6 @@ import (
"bytes"
"context"
"encoding/base64"
"errors"
"flag"
"fmt"
"math"
@@ -65,11 +64,10 @@ type AuthConfig struct {
type UserInfo struct {
Name string `yaml:"name,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
JWT *JWTConfig `yaml:"jwt,omitempty"`
AuthToken string `yaml:"auth_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
AuthToken string `yaml:"auth_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
@@ -96,8 +94,6 @@ type UserInfo struct {
rt http.RoundTripper
requests *metrics.Counter
requestErrors *metrics.Counter
backendRequests *metrics.Counter
backendErrors *metrics.Counter
requestsDuration *metrics.Summary
}
@@ -109,29 +105,13 @@ type HeadersConf struct {
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
}
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
func (ui *UserInfo) beginConcurrencyLimit() error {
select {
case ui.concurrencyLimitCh <- struct{}{}:
return nil
default:
// The number of concurrently executed requests for the given user equals the limt.
// Wait until some of the currently executed requests are finished, so the current request could be executed.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
select {
case ui.concurrencyLimitCh <- struct{}{}:
return nil
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.DeadlineExceeded) {
// The current request couldn't be executed until the request timeout.
ui.concurrencyLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
}
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
ui.getMaxConcurrentRequests(), ui.name(), err)
}
ui.concurrencyLimitReached.Inc()
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
}
}
@@ -147,28 +127,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
return mcr
}
func (ui *UserInfo) stopHealthChecks() {
if ui == nil {
return
}
if ui.URLPrefix != nil {
bus := ui.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
if ui.DefaultURL != nil {
bus := ui.DefaultURL.bus.Load()
bus.stopHealthChecks()
}
for i := range ui.URLMaps {
um := &ui.URLMaps[i]
if um.URLPrefix != nil {
bus := um.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
}
}
// Header is `Name: Value` http header, which must be added to the proxied request.
type Header struct {
Name string
@@ -304,7 +262,7 @@ type URLPrefix struct {
// the list of backend urls
//
// the list can be dynamically updated if `discover_backend_ips` option is set.
bus atomic.Pointer[backendURLs]
bus atomic.Pointer[[]*backendURL]
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
discoverBackendIPs bool
@@ -328,91 +286,21 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
}
}
type backendURLs struct {
healthChecksContext context.Context
healthChecksCancel func()
healthChecksWG sync.WaitGroup
bus []*backendURL
}
func newBackendURLs() *backendURLs {
ctx, cancel := context.WithCancel(context.Background())
return &backendURLs{
healthChecksContext: ctx,
healthChecksCancel: cancel,
}
}
func (bus *backendURLs) add(u *url.URL) {
bus.bus = append(bus.bus, &backendURL{
url: u,
healthCheckContext: bus.healthChecksContext,
healthCheckWG: &bus.healthChecksWG,
})
}
func (bus *backendURLs) stopHealthChecks() {
bus.healthChecksCancel()
bus.healthChecksWG.Wait()
}
type backendURL struct {
broken atomic.Bool
healthCheckContext context.Context
healthCheckWG *sync.WaitGroup
brokenDeadline atomic.Uint64
concurrentRequests atomic.Int32
url *url.URL
}
func (bu *backendURL) isBroken() bool {
return bu.broken.Load()
ct := fasttime.UnixTimestamp()
return ct < bu.brokenDeadline.Load()
}
func (bu *backendURL) setBroken() {
if bu.broken.CompareAndSwap(false, true) {
bu.healthCheckWG.Go(func() {
bu.runHealthCheck()
bu.broken.Store(false)
})
}
}
func (bu *backendURL) runHealthCheck() {
port := bu.url.Port()
if port == "" {
port = "80"
}
addr := net.JoinHostPort(bu.url.Hostname(), port)
t := time.NewTicker(*failTimeout)
defer t.Stop()
for {
select {
case <-t.C:
// Verify network connectivity via TCP dial before marking backend healthy.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
cancel()
if err != nil {
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
return
}
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
continue
}
_ = c.Close()
return
case <-bu.healthCheckContext.Done():
return
}
}
deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
bu.brokenDeadline.Store(deadline)
}
func (bu *backendURL) get() {
@@ -424,8 +312,8 @@ func (bu *backendURL) put() {
}
func (up *URLPrefix) getBackendsCount() int {
bus := up.bus.Load()
return len(bus.bus)
pbus := up.bus.Load()
return len(*pbus)
}
// getBackendURL returns the backendURL depending on the load balance policy.
@@ -436,15 +324,16 @@ func (up *URLPrefix) getBackendsCount() int {
func (up *URLPrefix) getBackendURL() *backendURL {
up.discoverBackendAddrsIfNeeded()
bus := up.bus.Load()
if len(bus.bus) == 0 {
pbus := up.bus.Load()
bus := *pbus
if len(bus) == 0 {
return nil
}
if up.loadBalancingPolicy == "first_available" {
return getFirstAvailableBackendURL(bus.bus)
return getFirstAvailableBackendURL(bus)
}
return getLeastLoadedBackendURL(bus.bus, &up.n)
return getLeastLoadedBackendURL(bus, &up.n)
}
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
@@ -518,24 +407,25 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
cancel()
// generate new backendURLs for the resolved IPs
busNew := newBackendURLs()
var busNew []*backendURL
for _, bu := range up.busOriginal {
host := bu.Hostname()
for _, addr := range hostToAddrs[host] {
buCopy := *bu
buCopy.Host = addr
busNew.add(&buCopy)
busNew = append(busNew, &backendURL{
url: &buCopy,
})
}
}
bus := up.bus.Load()
if areEqualBackendURLs(bus.bus, busNew.bus) {
pbus := up.bus.Load()
if areEqualBackendURLs(*pbus, busNew) {
return
}
// Store new backend urls
up.bus.Store(busNew)
bus.stopHealthChecks()
up.bus.Store(&busNew)
}
func areEqualBackendURLs(a, b []*backendURL) bool {
@@ -566,30 +456,27 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
for i := 1; i < len(bus); i++ {
if !bus[i].isBroken() {
bu = bus[i]
bu.get()
return bu
break
}
}
return nil
bu.get()
return bu
}
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
//
// backendURL.put() must be called on the returned backendURL after the request is complete.
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
if len(bus) == 1 {
// Fast path - return the only backend url.
bu := bus[0]
if bu.isBroken() {
return nil
}
bu.get()
return bu
}
// Slow path - select other backend urls.
n := atomicCounter.Add(1) - 1
for i := range uint32(len(bus)) {
for i := uint32(0); i < uint32(len(bus)); i++ {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
if bu.isBroken() {
@@ -607,7 +494,7 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
// Slow path - return the backend with the minimum number of concurrently executed requests.
buMinIdx := n % uint32(len(bus))
minRequests := bus[buMinIdx].concurrentRequests.Load()
for i := uint32(1); i < uint32(len(bus)); i++ {
for i := uint32(0); i < uint32(len(bus)); i++ {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
if bu.isBroken() {
@@ -621,9 +508,6 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
}
}
buMin := bus[buMinIdx]
if buMin.isBroken() {
return nil
}
buMin.get()
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
return buMin
@@ -742,9 +626,11 @@ func initAuthConfig() {
configTimestamp.Set(fasttime.UnixTimestamp())
stopCh = make(chan struct{})
authConfigWG.Go(func() {
authConfigWG.Add(1)
go func() {
defer authConfigWG.Done()
authConfigReloader(sighupCh)
})
}()
}
func stopAuthConfig() {
@@ -800,9 +686,6 @@ var (
// authUsers contains the currently loaded auth users
authUsers atomic.Pointer[map[string]*UserInfo]
// jwt authentication cache
jwtAuthCache atomic.Pointer[jwtCache]
authConfigWG sync.WaitGroup
stopCh chan struct{}
)
@@ -819,7 +702,7 @@ func reloadAuthConfig() (bool, error) {
ok, err := reloadAuthConfigData(data)
if err != nil {
return false, fmt.Errorf("failed to parse -auth.config=%q: %w", *authConfigPath, err)
return false, fmt.Errorf("failed to pars -auth.config=%q: %w", *authConfigPath, err)
}
if !ok {
return false, nil
@@ -842,14 +725,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
return false, fmt.Errorf("failed to parse auth config: %w", err)
}
jui, err := parseJWTUsers(ac)
if err != nil {
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
}
jwtc := &jwtCache{
users: jui,
}
m, err := parseAuthConfigUsers(ac)
if err != nil {
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
@@ -857,11 +732,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
acPrev := authConfig.Load()
if acPrev != nil {
acPrev.UnauthorizedUser.stopHealthChecks()
for i := range acPrev.Users {
acPrev.Users[i].stopHealthChecks()
}
metrics.UnregisterSet(acPrev.ms, true)
}
metrics.RegisterSet(ac.ms)
@@ -869,7 +739,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
authConfig.Store(ac)
authConfigData.Store(&data)
authUsers.Store(&m)
jwtAuthCache.Store(jwtc)
return true, nil
}
@@ -894,9 +763,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
if ui.BearerToken != "" {
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
}
if ui.JWT != nil {
return nil, fmt.Errorf("field jwt can't be specified for unauthorized_user section")
}
if ui.AuthToken != "" {
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
}
@@ -912,8 +778,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
}
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
@@ -943,17 +807,10 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
}
for i := range uis {
ui := &uis[i]
// users with jwt tokens are parsed by parseJWTUsers function.
// the function also checks that users with jwt tokens do not have auth tokens, bearer tokens, usernames and passwords.
if ui.JWT != nil {
continue
}
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
if err != nil {
return nil, err
}
for _, at := range ats {
if uiOld := byAuthToken[at]; uiOld != nil {
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
@@ -969,8 +826,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
}
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
mcr := ui.getMaxConcurrentRequests()
@@ -1205,11 +1060,13 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
}
// Initialize up.bus
bus := newBackendURLs()
for _, bu := range up.busOriginal {
bus.add(bu)
bus := make([]*backendURL, len(up.busOriginal))
for i, bu := range up.busOriginal {
bus[i] = &backendURL{
url: bu,
}
}
up.bus.Store(bus)
up.bus.Store(&bus)
return nil
}

View File

@@ -378,7 +378,7 @@ users:
RetryStatusCodes: []int{500, 501},
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
DropSrcPathPrefixParts: new(1),
DropSrcPathPrefixParts: intp(1),
DiscoverBackendIPs: &discoverBackendIPsTrue,
},
}, nil)
@@ -621,22 +621,6 @@ unauthorized_user:
},
},
})
// skip user info with jwt, it is parsed by parseJWTUsers
f(`
users:
- username: foo
password: bar
url_prefix: http://aaa:343/bbb
- jwt: {skip_verify: true}
url_prefix: http://aaa:343/bbb
`, map[string]*UserInfo{
getHTTPAuthBasicToken("foo", "bar"): {
Username: "foo",
Password: "bar",
URLPrefix: mustParseURL("http://aaa:343/bbb"),
},
}, nil)
}
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
@@ -769,7 +753,7 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
up.loadBalancingPolicy = "least_loaded"
pbus := up.bus.Load()
bus := pbus.bus
bus := *pbus
fn := func(ns ...int) {
t.Helper()
@@ -841,13 +825,13 @@ func TestBrokenBackend(t *testing.T) {
})
up.loadBalancingPolicy = "least_loaded"
pbus := up.bus.Load()
bus := pbus.bus
bus := *pbus
// explicitly mark one of the backends as broken
bus[1].setBroken()
// broken backend should never return while there are healthy backends
for range int(1e3) {
for i := 0; i < 1e3; i++ {
b := up.getBackendURL()
if b.isBroken() {
t.Fatalf("unexpected broken backend %q", b.url)
@@ -864,7 +848,7 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
up.discoverBackendAddrsIfNeeded()
pbus := up.bus.Load()
bus := pbus.bus
bus := *pbus
if len(bus) != 1 {
t.Fatalf("expected url list to be of size 1; got %d instead", len(bus))
@@ -958,14 +942,16 @@ func mustParseURL(u string) *URLPrefix {
}
func mustParseURLs(us []string) *URLPrefix {
bus := newBackendURLs()
bus := make([]*backendURL, len(us))
urls := make([]*url.URL, len(us))
for i, u := range us {
pu, err := url.Parse(u)
if err != nil {
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
}
bus.add(pu)
bus[i] = &backendURL{
url: pu,
}
urls[i] = pu
}
up := &URLPrefix{}
@@ -974,11 +960,15 @@ func mustParseURLs(us []string) *URLPrefix {
} else {
up.vOriginal = us
}
up.bus.Store(bus)
up.bus.Store(&bus)
up.busOriginal = urls
return up
}
func intp(n int) *int {
return &n
}
func mustNewRegex(s string) *Regex {
var re Regex
if err := yaml.Unmarshal([]byte(s), &re); err != nil {

View File

@@ -1,156 +0,0 @@
package main
import (
"fmt"
"os"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
type jwtCache struct {
// users contain UserInfo`s from AuthConfig with JWTConfig set
users []*UserInfo
}
type JWTConfig struct {
PublicKeys []string `yaml:"public_keys,omitempty"`
PublicKeyFiles []string `yaml:"public_key_files,omitempty"`
SkipVerify bool `yaml:"skip_verify,omitempty"`
verifierPool *jwt.VerifierPool
}
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, error) {
jui := make([]*UserInfo, 0, len(ac.Users))
for _, ui := range ac.Users {
jwtToken := ui.JWT
if jwtToken == nil {
continue
}
if ui.AuthToken != "" || ui.BearerToken != "" || ui.Username != "" || ui.Password != "" {
return nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
}
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify {
return nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files or have skip_verify=true")
}
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 {
keys := make([]any, 0, len(jwtToken.PublicKeys)+len(jwtToken.PublicKeyFiles))
for i := range jwtToken.PublicKeys {
k, err := jwt.ParseKey([]byte(jwtToken.PublicKeys[i]))
if err != nil {
return nil, err
}
keys = append(keys, k)
}
for _, filePath := range jwtToken.PublicKeyFiles {
keyData, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
}
k, err := jwt.ParseKey(keyData)
if err != nil {
return nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
}
keys = append(keys, k)
}
vp, err := jwt.NewVerifierPool(keys)
if err != nil {
return nil, err
}
jwtToken.verifierPool = vp
}
if err := ui.initURLs(); err != nil {
return nil, err
}
metricLabels, err := ui.getMetricLabels()
if err != nil {
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
}
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
mcr := ui.getMaxConcurrentRequests()
ui.concurrencyLimitCh = make(chan struct{}, mcr)
ui.concurrencyLimitReached = ac.ms.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
return float64(cap(ui.concurrencyLimitCh))
})
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
return float64(len(ui.concurrencyLimitCh))
})
rt, err := newRoundTripper(ui.TLSCAFile, ui.TLSCertFile, ui.TLSKeyFile, ui.TLSServerName, ui.TLSInsecureSkipVerify)
if err != nil {
return nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
}
ui.rt = rt
jui = append(jui, &ui)
}
// the limitation will be lifted once claim based matching will be implemented
if len(jui) > 1 {
return nil, fmt.Errorf("multiple users with JWT tokens are not supported; found %d users", len(jui))
}
return jui, nil
}
func getUserInfoByJWTToken(ats []string) *UserInfo {
js := *jwtAuthCache.Load()
if len(js.users) == 0 {
return nil
}
for _, at := range ats {
if strings.Count(at, ".") != 2 {
continue
}
at, _ = strings.CutPrefix(at, `http_auth:`)
tkn, err := jwt.NewToken(at, true)
if err != nil {
if *logInvalidAuthTokens {
logger.Infof("cannot parse jwt token: %s", err)
}
continue
}
if tkn.IsExpired(time.Now()) {
if *logInvalidAuthTokens {
logger.Infof("jwt token is expired")
}
continue
}
for _, ui := range js.users {
if ui.JWT.SkipVerify {
return ui
}
if err := ui.JWT.verifierPool.Verify(tkn); err != nil {
if *logInvalidAuthTokens {
logger.Infof("cannot verify jwt token: %s", err)
}
continue
}
return ui
}
}
return nil
}

View File

@@ -1,304 +0,0 @@
package main
import (
"fmt"
"os"
"path/filepath"
"testing"
)
func TestJWTParseAuthConfigFailure(t *testing.T) {
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
yQIDAQAB
-----END PUBLIC KEY-----
`
// ECDSA with the P-521 curve
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
XOtclIk1uhc03oL9nOQ=
-----END PUBLIC KEY-----
`
f := func(s string, expErr string) {
t.Helper()
ac, err := parseAuthConfig([]byte(s))
if err != nil {
if expErr != err.Error() {
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
}
return
}
users, err := parseJWTUsers(ac)
if err != nil {
if expErr != err.Error() {
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
}
return
}
t.Fatalf("expecting non-nil error; got %v", users)
}
// unauthorized_user cannot be used with jwt
f(`
unauthorized_user:
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `field jwt can't be specified for unauthorized_user section`)
// username and jwt in a single config
f(`
users:
- username: foo
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// bearer_token and jwt in a single config
f(`
users:
- bearer_token: foo
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// bearer_token and jwt in a single config
f(`
users:
- auth_token: "Foo token"
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// jwt public_keys or skip_verify must be set, part 1
f(`
users:
- jwt: {}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
// jwt public_keys or skip_verify must be set, part 2
f(`
users:
- jwt: {public_keys: null}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
// jwt public_keys or skip_verify must be set, part 3
f(`
users:
- jwt: {public_keys: []}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
// jwt public_keys, public_key_files or skip_verify must be set
f(`
users:
- jwt: {public_key_files: []}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
// invalid public key, part 1
f(`
users:
- jwt: {public_keys: [""]}
url_prefix: http://foo.bar
`, `failed to parse key "": failed to decode PEM block containing public key`)
// invalid public key, part 2
f(`
users:
- jwt: {public_keys: ["invalid"]}
url_prefix: http://foo.bar
`, `failed to parse key "invalid": failed to decode PEM block containing public key`)
// invalid public key, part 2
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
- %q
- "invalid"
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey), `failed to parse key "invalid": failed to decode PEM block containing public key`)
// several jwt users
// invalid public key, part 2
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey), `multiple users with JWT tokens are not supported; found 2 users`)
// public key file doesn't exist
f(`
users:
- jwt:
public_key_files:
- /path/to/nonexistent/file.pem
url_prefix: http://foo.bar
`, "cannot read public key from file \"/path/to/nonexistent/file.pem\": open /path/to/nonexistent/file.pem: no such file or directory")
// public key file invalid
// auth with key from file
publicKeyFile := filepath.Join(t.TempDir(), "a_public_key.pem")
if err := os.WriteFile(publicKeyFile, []byte(`invalidPEM`), 0o644); err != nil {
t.Fatalf("failed to write public key file: %s", err)
}
f(`
users:
- jwt:
public_key_files:
- `+publicKeyFile+`
url_prefix: http://foo.bar
`, "cannot parse public key from file \""+publicKeyFile+"\": failed to parse key \"invalidPEM\": failed to decode PEM block containing public key")
}
func TestJWTParseAuthConfigSuccess(t *testing.T) {
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
yQIDAQAB
-----END PUBLIC KEY-----
`
// ECDSA with the P-521 curve
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
XOtclIk1uhc03oL9nOQ=
-----END PUBLIC KEY-----
`
f := func(s string) {
t.Helper()
ac, err := parseAuthConfig([]byte(s))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
jui, err := parseJWTUsers(ac)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
for _, ui := range jui {
if ui.JWT == nil {
t.Fatalf("unexpected nil JWTConfig")
}
if ui.JWT.SkipVerify {
if ui.JWT.verifierPool != nil {
t.Fatalf("unexpected non-nil verifier pool for skip_verify=true")
}
continue
}
if ui.JWT.verifierPool == nil {
t.Fatalf("unexpected nil verifier pool for non-empty public keys")
}
}
}
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey))
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validECDSAPublicKey))
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey))
f(`
users:
- jwt:
skip_verify: true
url_prefix: http://foo.bar
`)
// combined with other auth methods
f(`
users:
- username: foo
password: bar
url_prefix: http://foo.bar
- jwt:
skip_verify: true
url_prefix: http://foo.bar
- bearer_token: foo
url_prefix: http://foo.bar
`)
rsaKeyFile := filepath.Join(t.TempDir(), "rsa_public_key.pem")
if err := os.WriteFile(rsaKeyFile, []byte(validRSAPublicKey), 0o644); err != nil {
t.Fatalf("failed to write RSA key file: %s", err)
}
ecdsaKeyFile := filepath.Join(t.TempDir(), "ecdsa_public_key.pem")
if err := os.WriteFile(ecdsaKeyFile, []byte(validECDSAPublicKey), 0o644); err != nil {
t.Fatalf("failed to write ECDSA key file: %s", err)
}
// Test single public key file
f(fmt.Sprintf(`
users:
- jwt:
public_key_files:
- %q
url_prefix: http://foo.bar
`, rsaKeyFile))
// Test multiple public key files
f(fmt.Sprintf(`
users:
- jwt:
public_key_files:
- %q
- %q
url_prefix: http://foo.bar
`, rsaKeyFile, ecdsaKeyFile))
// Test combined inline keys and files
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
public_key_files:
- %q
url_prefix: http://foo.bar
`, validECDSAPublicKey, rsaKeyFile))
}

View File

@@ -24,7 +24,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ioutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@@ -41,38 +40,22 @@ var (
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
"See also -maxConcurrentRequests")
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+
"This allows reducing the comsumption of backend resources when processing requests from clients connected via slow networks. "+
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering")
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables request body buffering and retries. "+
"See also -requestBufferSize")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process simultaneously. "+
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
"This protects vmauth itself from overloading and out-of-memory (OOM) failures. See also -maxConcurrentPerUserRequests "+
"and https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 100, "The maximum number of concurrent requests vmauth can process per each configured user. "+
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
"This provides fairness and isolation between users, preventing a single user from consuming all the available resources. "+
"It works in conjunction with -maxConcurrentRequests, which sets the global limit across all users. "+
"This default can be overridden for individual users via max_concurrent_requests option in per-user config. "+
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration to wait before rejecting incoming requests if concurrency limit "+
"specified via -maxConcurrentRequests or -maxConcurrentPerUserRequests command-line flags is reached. "+
"Requests are rejected with '429 Too Many Requests' http status code if the limit is still reached after the -maxQueueDuration duration. "+
"This allows graceful handling of short spikes in concurrent requests. See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
"'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
"in per-user config")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
"Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests")
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup")
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
@@ -168,6 +151,7 @@ func requestHandlerWithInternalRoutes(w http.ResponseWriter, r *http.Request) bo
}
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
ats := getAuthTokensFromRequest(r)
if len(ats) == 0 {
// Process requests for unauthorized users
@@ -181,32 +165,29 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
if ui := getUserInfoByAuthTokens(ats); ui != nil {
processUserRequest(w, r, ui)
return true
}
if ui := getUserInfoByJWTToken(ats); ui != nil {
processUserRequest(w, r, ui)
return true
}
uu := authConfig.Load().UnauthorizedUser
if uu != nil {
processUserRequest(w, r, uu)
return true
}
invalidAuthTokenRequests.Inc()
if *logInvalidAuthTokens {
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
err = &httpserver.ErrorWithStatusCode{
Err: err,
StatusCode: http.StatusUnauthorized,
ui := getUserInfoByAuthTokens(ats)
if ui == nil {
uu := authConfig.Load().UnauthorizedUser
if uu != nil {
processUserRequest(w, r, uu)
return true
}
httpserver.Errorf(w, r, "%s", err)
} else {
http.Error(w, "Unauthorized", http.StatusUnauthorized)
invalidAuthTokenRequests.Inc()
if *logInvalidAuthTokens {
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
err = &httpserver.ErrorWithStatusCode{
Err: err,
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
} else {
http.Error(w, "Unauthorized", http.StatusUnauthorized)
}
return true
}
processUserRequest(w, r, ui)
return true
}
@@ -227,124 +208,26 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
ui.requests.Inc()
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration)
defer cancel()
// Acquire global concurrency limit.
if err := beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
return
}
defer endConcurrencyLimit()
// Set read deadline for reading the initial chunk for the request body.
rc := http.NewResponseController(w)
deadline, ok := ctx.Deadline()
if !ok {
logger.Panicf("BUG: expecting valid deadline for the context")
}
if err := rc.SetReadDeadline(deadline); err != nil {
logger.Panicf("BUG: cannot set read deadline: %s", err)
}
// Read the initial chunk for the request body.
userName := ui.name()
if userName == "" {
userName = "unauthorized"
}
bb, err := bufferRequestBody(ctx, r.Body, userName)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
r.Body = bb
// Disable the read deadline for the rest of the request body.
if err := rc.SetReadDeadline(time.Time{}); err != nil {
logger.Panicf("BUG: cannot reset read deadline: %s", err)
}
// Acquire concurrency limit for the given user.
if err := ui.beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
return
}
defer ui.endConcurrencyLimit()
// Process the request.
processRequest(w, r, ui)
}
func beginConcurrencyLimit(ctx context.Context) error {
// Limit the concurrency of requests to backends
concurrencyLimitOnce.Do(concurrencyLimitInit)
select {
case concurrencyLimitCh <- struct{}{}:
return nil
if err := ui.beginConcurrencyLimit(); err != nil {
handleConcurrencyLimitError(w, r, err)
<-concurrencyLimitCh
return
}
default:
// The -maxConcurrentRequests are executed. Wait until some of the requests are finished,
// so the current request could be executed.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
select {
case concurrencyLimitCh <- struct{}{}:
return nil
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.DeadlineExceeded) {
// The current request couldn't be executed until the request timeout.
concurrentRequestsLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
*maxQueueDuration, cap(concurrencyLimitCh))
}
return fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
}
concurrentRequestsLimitReached.Inc()
err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
handleConcurrencyLimitError(w, r, err)
return
}
}
func endConcurrencyLimit() {
processRequest(w, r, ui)
ui.endConcurrencyLimit()
<-concurrencyLimitCh
}
func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (io.ReadCloser, error) {
if r == nil {
// This is a GET request with nil reader.
return nil, nil
}
maxBufSize := max(requestBufferSize.IntN(), maxRequestBodySizeToRetry.IntN())
if maxBufSize <= 0 {
return r, nil
}
lr := ioutil.GetLimitedReader(r, int64(maxBufSize))
defer ioutil.PutLimitedReader(lr)
start := time.Now()
buf, err := io.ReadAll(lr)
bufferRequestBodyDuration.UpdateDuration(start)
if err != nil {
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
rejectSlowClientRequests.Inc()
d := time.Since(start)
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("reject request from the user %s because the request body couldn't be read in -maxQueueDuration=%s; read %d bytes in %s",
userName, *maxQueueDuration, len(buf), d.Truncate(time.Second)),
StatusCode: http.StatusBadRequest,
}
}
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot read request body: %w", err),
StatusCode: http.StatusBadRequest,
}
}
bb := newBufferedBody(r, buf, maxBufSize)
return bb, nil
}
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
u := normalizeURL(r.URL)
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
@@ -370,26 +253,28 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
isDefault = true
}
rtb := newReadTrackingBody(r.Body, maxRequestBodySizeToRetry.IntN())
r.Body = rtb
maxAttempts := up.getBackendsCount()
for range maxAttempts {
for i := 0; i < maxAttempts; i++ {
bu := up.getBackendURL()
if bu == nil {
break
}
targetURL := bu.url
// Don't change path and add request_path query param for default route.
if isDefault {
// Don't change path and add request_path query param for default route.
query := targetURL.Query()
query.Set("request_path", u.String())
targetURL.RawQuery = query.Encode()
} else {
// Update path for regular routes.
} else { // Update path for regular routes.
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
}
wasLocalRetry := false
again:
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui, bu)
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui)
if needLocalRetry && !wasLocalRetry {
wasLocalRetry = true
goto again
@@ -399,20 +284,17 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
if ok {
return
}
bu.setBroken()
ui.backendErrors.Inc()
}
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("all the %d backends for the user %q are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend", up.getBackendsCount(), ui.name()),
Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
StatusCode: http.StatusBadGateway,
}
httpserver.Errorf(w, r, "%s", err)
ui.requestErrors.Inc()
ui.backendErrors.Inc()
}
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo, bu *backendURL) (bool, bool) {
ui.backendRequests.Inc()
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) (bool, bool) {
req := sanitizeRequestHeaders(r)
req.URL = targetURL
@@ -426,19 +308,28 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
}
}
bb, bbOK := req.Body.(*bufferedBody)
canRetry := !bbOK || bb.canRetry()
rtb, rtbOK := req.Body.(*readTrackingBody)
res, err := ui.rt.RoundTrip(req)
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not retry canceled requests.
clientCanceledRequests.Inc()
return true, false
if ctxErr := r.Context().Err(); ctxErr != nil {
// Override the error returned by the RoundTrip with the context error if it isn't non-nil
// This makes sure the proper logging for canceled and timed out requests - log the real cause of the error
// instead of the random error, which could be returned from RoundTrip because of canceled or timed out request.
err = ctxErr
}
if err != nil {
if !canRetry {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
// Do not retry canceled or timed out requests
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
if errors.Is(err, context.DeadlineExceeded) {
// Timed out request must be counted as errors, since this usually means that the backend is slow.
logger.Warnf("remoteAddr: %s; requestURI: %s; timeout while proxying the response from %s: %s", remoteAddr, requestURI, targetURL, err)
ui.backendErrors.Inc()
}
return false, false
}
if !rtbOK || !rtb.canRetry() {
// Request body cannot be re-sent to another backend. Return the error to the client then.
err = &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
@@ -446,51 +337,41 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
}
httpserver.Errorf(w, r, "%s", err)
ui.backendErrors.Inc()
ui.requestErrors.Inc()
bu.setBroken()
return true, false
}
if netutil.IsTrivialNetworkError(err) {
// Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS
if bbOK {
bb.resetReader()
}
return false, true
}
// Retry the request at another backend
// Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, requestURI, targetURL, err)
if bbOK {
bb.resetReader()
}
// NOTE: do not use httpserver.GetRequestURI
// it explicitly reads request body, which may fail retries.
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err)
return false, false
}
if slices.Contains(retryStatusCodes, res.StatusCode) {
if !canRetry {
_ = res.Body.Close()
if !rtbOK || !rtb.canRetry() {
// If we get an error from the retry_status_codes list, but cannot execute retry,
// we consider such a request an error as well.
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request body has been already consumed",
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request on another backend, because the request has been already consumed",
res.StatusCode, targetURL),
StatusCode: http.StatusServiceUnavailable,
}
httpserver.Errorf(w, r, "%s", err)
ui.backendErrors.Inc()
ui.requestErrors.Inc()
return true, false
}
// Retry requests at other backends if it matches retryStatusCodes.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed, retrying the request at another backend because response status code=%d belongs to retry_status_codes=%d",
remoteAddr, requestURI, targetURL, res.StatusCode, retryStatusCodes)
if bbOK {
bb.resetReader()
}
// NOTE: do not use httpserver.GetRequestURI
// it explicitly reads request body, which may fail retries.
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because response status code=%d belongs to retry_status_codes=%d",
remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes)
return false, false
}
removeHopHeaders(res.Header)
@@ -500,18 +381,11 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
err = copyStreamToClient(w, res.Body)
_ = res.Body.Close()
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not retry canceled requests.
clientCanceledRequests.Inc()
return true, false
}
if err != nil && !netutil.IsTrivialNetworkError(err) {
if err != nil && !netutil.IsTrivialNetworkError(err) && !errors.Is(err, context.Canceled) {
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
ui.requestErrors.Inc()
return true, false
}
return true, false
@@ -639,10 +513,6 @@ var (
configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`)
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
clientCanceledRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="client_canceled"}`)
rejectSlowClientRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="reject_slow_client"}`)
bufferRequestBodyDuration = metrics.NewSummary(`vmauth_buffer_request_body_duration_seconds`)
)
func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) {
@@ -726,13 +596,6 @@ func handleMissingAuthorizationError(w http.ResponseWriter) {
}
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not return any response for the request canceled by the client,
// since the connection to the client is already closed.
clientCanceledRequests.Inc()
return
}
w.Header().Add("Retry-After", "10")
err = &httpserver.ErrorWithStatusCode{
Err: err,
@@ -741,76 +604,120 @@ func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err err
httpserver.Errorf(w, r, "%s", err)
}
// bufferedBody serves two purposes:
// 1. Enables request retries when the body size does not exceed maxBodySize
// by fully buffering the body in memory.
// 2. Prevents slow clients from reducing effective server capacity by
// buffering the request body before acquiring a per-user concurrency slot.
//
// See bufferRequestBody for details on how bufferedBody is used.
type bufferedBody struct {
// r contains reader for reading the data after buf is read.
// readTrackingBody must be obtained via getReadTrackingBody()
type readTrackingBody struct {
// maxBodySize is the maximum body size to cache in buf.
//
// r is nil if buf contains all the data.
// Bigger bodies cannot be retried.
maxBodySize int
// r contains reader for initial data reading
r io.ReadCloser
// buf contains the initial buffer read from r.
// buf is a buffer for data read from r. Buf size is limited by maxBodySize.
// If more than maxBodySize is read from r, then cannotRetry is set to true.
buf []byte
// bufOffset is the offset at buf for already read bytes.
bufOffset int
// readBuf points to the cached data at buf, which must be read in the next call to Read().
readBuf []byte
// cannotRetry is set to true after Close() call on non-nil r.
// cannotRetry is set to true when more than maxBodySize bytes are read from r.
// In this case the read data cannot fit buf, so it cannot be re-read from buf.
cannotRetry bool
// bufComplete is set to true when buf contains complete request body read from r.
bufComplete bool
}
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody {
// Do not use sync.Pool here, since http.RoundTrip may still use request body after return.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
if len(buf) < maxBufSize {
// Read the full request body into buf.
r = nil
func newReadTrackingBody(r io.ReadCloser, maxBodySize int) *readTrackingBody {
// do not use sync.Pool there
// since http.RoundTrip may still use request body after return
// See this issue for details https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
rtb := &readTrackingBody{}
if maxBodySize < 0 {
maxBodySize = 0
}
rtb.maxBodySize = maxBodySize
return &bufferedBody{
r: r,
buf: buf,
if r == nil {
// This is GET request without request body
r = (*zeroReader)(nil)
}
rtb.r = r
return rtb
}
// Read implements io.Reader interface.
func (bb *bufferedBody) Read(p []byte) (int, error) {
if bb.cannotRetry {
return 0, fmt.Errorf("cannot read already closed body")
}
if bb.bufOffset < len(bb.buf) {
n := copy(p, bb.buf[bb.bufOffset:])
bb.bufOffset += n
return n, nil
}
if bb.r == nil {
return 0, io.EOF
}
return bb.r.Read(p)
}
type zeroReader struct{}
func (bb *bufferedBody) canRetry() bool {
return bb.r == nil
func (r *zeroReader) Read(_ []byte) (int, error) {
return 0, io.EOF
}
// Close implements io.Closer interface.
func (bb *bufferedBody) Close() error {
bb.resetReader()
if bb.r != nil {
bb.cannotRetry = true
return bb.r.Close()
}
func (r *zeroReader) Close() error {
return nil
}
func (bb *bufferedBody) resetReader() {
bb.bufOffset = 0
// Read implements io.Reader interface.
func (rtb *readTrackingBody) Read(p []byte) (int, error) {
if len(rtb.readBuf) > 0 {
n := copy(p, rtb.readBuf)
rtb.readBuf = rtb.readBuf[n:]
return n, nil
}
if rtb.r == nil {
if rtb.bufComplete {
return 0, io.EOF
}
return 0, fmt.Errorf("cannot read client request body after closing client reader")
}
n, err := rtb.r.Read(p)
if rtb.cannotRetry {
return n, err
}
if len(rtb.buf)+n > rtb.maxBodySize {
rtb.cannotRetry = true
return n, err
}
rtb.buf = append(rtb.buf, p[:n]...)
if err == io.EOF {
rtb.bufComplete = true
}
return n, err
}
func (rtb *readTrackingBody) canRetry() bool {
if rtb.cannotRetry {
return false
}
if rtb.bufComplete {
return true
}
return rtb.r != nil
}
// Close implements io.Closer interface.
func (rtb *readTrackingBody) Close() error {
if !rtb.cannotRetry {
rtb.readBuf = rtb.buf
} else {
rtb.readBuf = nil
}
// Close rtb.r only if the request body is completely read or if it is too big.
// http.Roundtrip performs body.Close call even without any Read calls,
// so this hack allows us to reuse request body.
if rtb.bufComplete || rtb.cannotRetry {
if rtb.r == nil {
return nil
}
err := rtb.r.Close()
rtb.r = nil
return err
}
return nil
}
func debugInfo(u *url.URL, r *http.Request) string {

View File

@@ -2,25 +2,14 @@ package main
import (
"bytes"
"context"
"crypto"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"encoding/base64"
"encoding/json"
"encoding/pem"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"sync/atomic"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
)
@@ -429,7 +418,7 @@ unauthorized_user:
}
responseExpected = `
statusCode=502
all the 2 backends for the user "" are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend`
all the 2 backends for the user "" are unavailable`
f(cfgStr, requestURL, backendHandler, responseExpected)
// all the backend_urls are unavailable for authorized user
@@ -447,7 +436,7 @@ users:
}
responseExpected = `
statusCode=502
all the 2 backends for the user "some-user" are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend`
all the 2 backends for the user "some-user" are unavailable`
f(cfgStr, requestURL, backendHandler, responseExpected)
// zero discovered backend IPs
@@ -469,7 +458,7 @@ unauthorized_user:
}
responseExpected = `
statusCode=502
all the 0 backends for the user "" are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend`
all the 0 backends for the user "" are unavailable`
f(cfgStr, requestURL, backendHandler, responseExpected)
netutil.Resolver = origResolver
@@ -486,7 +475,7 @@ unauthorized_user:
}
responseExpected = `
statusCode=502
all the 2 backends for the user "" are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend`
all the 2 backends for the user "" are unavailable`
f(cfgStr, requestURL, backendHandler, responseExpected)
if n := retries.Load(); n != 2 {
t.Fatalf("unexpected number of retries; got %d; want 2", n)
@@ -515,218 +504,6 @@ requested_url={BACKEND}/path2/foo/?de=fg`
}
}
func TestJWTRequestHandler(t *testing.T) {
// Generate RSA key pair for testing
privateKey, err := rsa.GenerateKey(rand.Reader, 2048)
if err != nil {
t.Fatalf("cannot generate RSA key: %s", err)
}
// Generate public key PEM
publicKeyBytes, err := x509.MarshalPKIXPublicKey(&privateKey.PublicKey)
if err != nil {
t.Fatalf("cannot marshal public key: %s", err)
}
publicKeyPEM := pem.EncodeToMemory(&pem.Block{
Type: "PUBLIC KEY",
Bytes: publicKeyBytes,
})
genToken := func(t *testing.T, body map[string]any, valid bool) string {
t.Helper()
headerJSON, err := json.Marshal(map[string]any{
"alg": "RS256",
"typ": "JWT",
})
if err != nil {
t.Fatalf("cannot marshal header: %s", err)
}
headerB64 := base64.RawURLEncoding.EncodeToString(headerJSON)
bodyJSON, err := json.Marshal(body)
if err != nil {
t.Fatalf("cannot marshal body: %s", err)
}
bodyB64 := base64.RawURLEncoding.EncodeToString(bodyJSON)
payload := headerB64 + "." + bodyB64
var signatureB64 string
if valid {
// Create real RSA signature
hash := crypto.SHA256
h := hash.New()
h.Write([]byte(payload))
digest := h.Sum(nil)
signature, err := rsa.SignPKCS1v15(rand.Reader, privateKey, hash, digest)
if err != nil {
t.Fatalf("cannot sign token: %s", err)
}
signatureB64 = base64.RawURLEncoding.EncodeToString(signature)
} else {
signatureB64 = base64.RawURLEncoding.EncodeToString([]byte("invalid_signature"))
}
return payload + "." + signatureB64
}
genToken(t, nil, false)
f := func(cfgStr string, r *http.Request, responseExpected string) {
t.Helper()
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if _, err := w.Write([]byte(r.RequestURI + "\n")); err != nil {
panic(fmt.Errorf("cannot write response: %w", err))
}
if v := r.Header.Get(`extra_label`); v != "" {
if _, err := w.Write([]byte(`extra_label=` + v + "\n")); err != nil {
panic(fmt.Errorf("cannot write response: %w", err))
}
}
if v := r.Header.Get(`extra_filters`); v != "" {
if _, err := w.Write([]byte(`extra_filters=` + v + "\n")); err != nil {
panic(fmt.Errorf("cannot write response: %w", err))
}
}
}))
defer ts.Close()
cfgStr = strings.ReplaceAll(cfgStr, "{BACKEND}", ts.URL)
responseExpected = strings.ReplaceAll(responseExpected, "{BACKEND}", ts.URL)
cfgOrigP := authConfigData.Load()
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
t.Fatalf("cannot load config data: %s", err)
}
defer func() {
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
if cfgOrigP != nil {
cfgOrig = *cfgOrigP
}
_, err := reloadAuthConfigData(cfgOrig)
if err != nil {
t.Fatalf("cannot load the original config: %s", err)
}
}()
w := &fakeResponseWriter{}
if !requestHandlerWithInternalRoutes(w, r) {
t.Fatalf("unexpected false is returned from requestHandler")
}
response := w.getResponse()
response = strings.ReplaceAll(response, "\r\n", "\n")
response = strings.TrimSpace(response)
responseExpected = strings.TrimSpace(responseExpected)
if response != responseExpected {
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, responseExpected)
}
}
simpleCfgStr := fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: {BACKEND}/foo`, string(publicKeyPEM))
noVMAccessClaimToken := genToken(t, nil, true)
defaultVMAccessClaimToken := genToken(t, map[string]any{
"exp": time.Now().Add(10 * time.Minute).Unix(),
"vm_access": map[string]any{},
}, true)
expiredToken := genToken(t, map[string]any{
"exp": 10,
"vm_access": map[string]any{},
}, true)
invalidSignatureToken := genToken(t, map[string]any{
"exp": time.Now().Add(10 * time.Minute).Unix(),
"vm_access": map[string]any{},
}, false)
// missing authorization
request := httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
responseExpected := `
statusCode=401
Www-Authenticate: Basic realm="Restricted"
missing 'Authorization' request header`
f(simpleCfgStr, request, responseExpected)
// token without vm_access claim
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+noVMAccessClaimToken)
responseExpected = `
statusCode=401
Unauthorized`
f(simpleCfgStr, request, responseExpected)
// expired token
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+expiredToken)
responseExpected = `
statusCode=401
Unauthorized`
f(simpleCfgStr, request, responseExpected)
// invalid signature token
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+invalidSignatureToken)
responseExpected = `
statusCode=401
Unauthorized`
f(simpleCfgStr, request, responseExpected)
// invalid signature token and skip verify
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+invalidSignatureToken)
responseExpected = `
statusCode=200
/foo/abc`
f(`
users:
- jwt:
skip_verify: true
url_prefix: {BACKEND}/foo`, request, responseExpected)
// token with default valid vm_access claim
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
responseExpected = `
statusCode=200
/foo/abc`
f(simpleCfgStr, request, responseExpected)
// jwt token used but no matching user with JWT token in config
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
responseExpected = `
statusCode=401
Unauthorized`
f(`
users:
- password: a-password
username: a-user
url_prefix: {BACKEND}/foo`, request, responseExpected)
// auth with key from file
publicKeyFile := filepath.Join(t.TempDir(), "a_public_key.pem")
if err := os.WriteFile(publicKeyFile, []byte(publicKeyPEM), 0o644); err != nil {
t.Fatalf("failed to write public key file: %s", err)
}
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
request.Header.Set(`Authorization`, `Bearer `+defaultVMAccessClaimToken)
responseExpected = `
statusCode=200
/foo/abc`
f(fmt.Sprintf(`
users:
- jwt:
public_key_files:
- %q
url_prefix: {BACKEND}/foo`, string(publicKeyFile)), request, responseExpected)
}
type fakeResponseWriter struct {
h http.Header
@@ -769,300 +546,28 @@ func (w *fakeResponseWriter) WriteHeader(statusCode int) {
}
}
// This is needed for net/http.ResponseController
func (w *fakeResponseWriter) SetReadDeadline(deadline time.Time) error {
return nil
}
func TestBufferRequestBody_Success(t *testing.T) {
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
f := func(body *bytes.Buffer, requestBufferSizeFlag, maxRequestBodySizeToRetryFlag string) {
t.Helper()
expectedResponse := "statusCode=200"
if body.Len() > 0 {
expectedResponse += "\n" + body.String()
}
if err := requestBufferSize.Set(requestBufferSizeFlag); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
if err := maxRequestBodySizeToRetry.Set(maxRequestBodySizeToRetryFlag); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
var backendCalled bool
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
backendCalled = true
b, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, fmt.Sprintf("cannot read body: %s", err), http.StatusBadRequest)
return
}
if _, err := w.Write(b); err != nil {
http.Error(w, fmt.Sprintf("cannot write body: %s", err), http.StatusInternalServerError)
return
}
}))
defer ts.Close()
// regular url_prefix
cfgStr := strings.ReplaceAll(`
unauthorized_user:
url_prefix: {BACKEND}/foo`, "{BACKEND}", ts.URL)
cfgOrigP := authConfigData.Load()
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
t.Fatalf("cannot load config data: %s", err)
}
defer func() {
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
if cfgOrigP != nil {
cfgOrig = *cfgOrigP
}
_, err := reloadAuthConfigData(cfgOrig)
if err != nil {
t.Fatalf("cannot load the original config: %s", err)
}
}()
r, err := http.NewRequest(http.MethodPost, `http://some-host.com`, body)
if err != nil {
t.Fatalf("cannot initialize http request: %s", err)
}
w := &fakeResponseWriter{}
if !requestHandlerWithInternalRoutes(w, r) {
t.Fatalf("unexpected false is returned from requestHandler")
}
response := w.getResponse()
response = strings.ReplaceAll(response, "\r\n", "\n")
response = strings.TrimSpace(response)
if response != expectedResponse {
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, expectedResponse)
}
if !backendCalled {
t.Fatalf("backend is not called")
}
}
// no body, no buffering, no retry
f(bytes.NewBuffer(nil), "0", "0")
// no body, buffering on, no retry
f(bytes.NewBuffer(nil), "100", "0")
// no body, no buffering, retry on
f(bytes.NewBuffer(nil), "0", "100")
// no body, buffering on, retry on
f(bytes.NewBuffer(nil), "100", "100")
// body smaller than buffer, retry max on
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "101", "101")
// body smaller than buffer
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "501", "0")
// body same size as buffer
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "500", "0")
// body bigger than a buffer
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "499", "0")
// body bigger than tmpBuf 8KiB used in buffering
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16384", "")
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16385", "")
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16383", "")
}
func TestBufferRequestBody_Failure(t *testing.T) {
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
defaultMaxQueueDuration := *maxQueueDuration
defer func() {
*maxQueueDuration = defaultMaxQueueDuration
}()
f := func(body *mockBody, expectedResponse string) {
t.Helper()
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
if err := requestBufferSize.Set("2048"); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
*maxQueueDuration = 100 * time.Millisecond
var backendCalled bool
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
backendCalled = true
b, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, fmt.Sprintf("cannot read body: %s", err), http.StatusBadRequest)
return
}
if _, err := w.Write(b); err != nil {
http.Error(w, fmt.Sprintf("cannot write body: %s", err), http.StatusInternalServerError)
return
}
}))
defer ts.Close()
// regular url_prefix
cfgStr := strings.ReplaceAll(`
unauthorized_user:
url_prefix: {BACKEND}/foo`, "{BACKEND}", ts.URL)
cfgOrigP := authConfigData.Load()
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
t.Fatalf("cannot load config data: %s", err)
}
defer func() {
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
if cfgOrigP != nil {
cfgOrig = *cfgOrigP
}
_, err := reloadAuthConfigData(cfgOrig)
if err != nil {
t.Fatalf("cannot load the original config: %s", err)
}
}()
r, err := http.NewRequest(http.MethodPost, `http://some-host.com`, body)
if err != nil {
t.Fatalf("cannot initialize http request: %s", err)
}
w := &fakeResponseWriter{}
if !requestHandlerWithInternalRoutes(w, r) {
t.Fatalf("unexpected false is returned from requestHandler")
}
response := w.getResponse()
response = strings.ReplaceAll(response, "\r\n", "\n")
response = strings.TrimSpace(response)
if response != expectedResponse {
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, expectedResponse)
}
if backendCalled {
t.Fatalf("backend is called")
}
}
// an error at the beginning of reading
f(&mockBody{err: fmt.Errorf("an error")}, `statusCode=400
cannot read request body: an error`)
// an error after reading 1024 bytes, buffer size is 2048 bytes
f(&mockBody{head: make([]byte, 1024), err: fmt.Errorf("an error")}, `statusCode=400
cannot read request body: an error`)
}
type mockBody struct {
head []byte
err error
tail []byte
}
func (r *mockBody) Read(p []byte) (n int, err error) {
if len(r.head) > 0 {
n = copy(p, r.head)
r.head = r.head[n:]
return n, nil
}
if r.err != nil {
return 0, r.err
}
if len(r.tail) > 0 {
n = copy(p, r.tail)
r.tail = r.tail[n:]
return n, nil
}
return 0, io.EOF
}
func TestBufferedBody_RetrySuccess(t *testing.T) {
func TestReadTrackingBody_RetrySuccess(t *testing.T) {
f := func(s string, maxBodySize int) {
t.Helper()
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
ctx := context.Background()
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
bb, ok := rb.(*bufferedBody)
canRetry := !ok || bb.canRetry()
if !canRetry {
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true before reading anything")
}
for i := range 5 {
data, err := io.ReadAll(rb)
for i := 0; i < 5; i++ {
data, err := io.ReadAll(rtb)
if err != nil {
t.Fatalf("unexpected error when reading all the data at iteration %d: %s", i, err)
}
if string(data) != s {
t.Fatalf("unexpected data read at iteration %d\ngot\n%s\nwant\n%s", i, data, s)
}
if err := rb.Close(); err != nil {
t.Fatalf("unexpected error when closing bufferedBody at iteration %d: %s", i, err)
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing readTrackingBody at iteration %d: %s", i, err)
}
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true at iteration %d", i)
}
}
}
@@ -1072,48 +577,19 @@ func TestBufferedBody_RetrySuccess(t *testing.T) {
f("", 100)
f("foo", 100)
f("foobar", 100)
f(newTestString(1000), 1001)
f(newTestString(1000), 1000)
}
func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
func TestReadTrackingBody_RetrySuccessPartialRead(t *testing.T) {
f := func(s string, maxBodySize int) {
t.Helper()
// Check the case with partial read
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
ctx := context.Background()
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
bb, ok := rb.(*bufferedBody)
canRetry := !ok || bb.canRetry()
if !canRetry {
t.Fatalf("canRetry must return true")
}
for i := range len(s) {
for i := 0; i < len(s); i++ {
buf := make([]byte, i)
n, err := io.ReadFull(rb, buf)
n, err := io.ReadFull(rtb, buf)
if err != nil {
t.Fatalf("unexpected error when reading %d bytes: %s", i, err)
}
@@ -1123,20 +599,26 @@ func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
if string(buf) != s[:i] {
t.Fatalf("unexpected data read with the length %d\ngot\n%s\nwant\n%s", i, buf, s[:i])
}
if err := rb.Close(); err != nil {
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing reader after reading %d bytes", i)
}
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true after closing the reader after reading %d bytes", i)
}
}
data, err := io.ReadAll(rb)
data, err := io.ReadAll(rtb)
if err != nil {
t.Fatalf("unexpected error when reading all the data: %s", err)
}
if string(data) != s {
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
}
if err := rb.Close(); err != nil {
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
}
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true after closing the reader after reading all the input")
}
}
@@ -1145,53 +627,30 @@ func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
f("", 100)
f("foo", 100)
f("foobar", 100)
f(newTestString(1000), 1001)
f(newTestString(1000), 1000)
}
func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
func TestReadTrackingBody_RetryFailureTooBigBody(t *testing.T) {
f := func(s string, maxBodySize int) {
t.Helper()
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
if err := requestBufferSize.Set("0"); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
if err := maxRequestBodySizeToRetry.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
ctx := context.Background()
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
bb, ok := rb.(*bufferedBody)
canRetry := !ok || bb.canRetry()
if canRetry {
t.Fatalf("canRetry() must return false because of too big request body")
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true before reading anything")
}
buf := make([]byte, 1)
n, err := io.ReadFull(rb, buf)
n, err := io.ReadFull(rtb, buf)
if err != nil {
t.Fatalf("unexpected error when reading a single byte: %s", err)
}
if n != 1 {
t.Fatalf("unexpected number of bytes read; got %d; want 1", n)
}
data, err := io.ReadAll(rb)
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true after reading one byte")
}
data, err := io.ReadAll(rtb)
if err != nil {
t.Fatalf("unexpected error when reading all the data: %s", err)
}
@@ -1199,11 +658,14 @@ func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
if dataRead != s {
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", dataRead, s)
}
if err := rb.Close(); err != nil {
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
}
if rtb.canRetry() {
t.Fatalf("canRetry() must return false after closing the reader")
}
data, err = io.ReadAll(rb)
data, err = io.ReadAll(rtb)
if err == nil {
t.Fatalf("expecting non-nil error")
}
@@ -1217,48 +679,35 @@ func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
f(newTestString(2*maxBodySize), maxBodySize)
}
func TestBufferedBody_RetryFailureZeroOrNegativeMaxBodySize(t *testing.T) {
func TestReadTrackingBody_RetryFailureZeroOrNegativeMaxBodySize(t *testing.T) {
f := func(s string, maxBodySize int) {
t.Helper()
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
ctx := context.Background()
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
bb, ok := rb.(*bufferedBody)
canRetry := !ok || bb.canRetry()
if !canRetry {
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true before reading anything")
}
data, err := io.ReadAll(rb)
data, err := io.ReadAll(rtb)
if err != nil {
t.Fatalf("unexpected error when reading all the data: %s", err)
}
if string(data) != s {
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
}
if err := rb.Close(); err != nil {
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
}
data, err = io.ReadAll(rb)
if err != nil {
t.Fatalf("unexpected error in io.ReadAll: %s", err)
if rtb.canRetry() {
t.Fatalf("canRetry() must return false after closing the reader")
}
if string(data) != s {
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
data, err = io.ReadAll(rtb)
if err == nil {
t.Fatalf("expecting non-nil error")
}
if len(data) != 0 {
t.Fatalf("unexpected non-empty data read: %q", data)
}
}

View File

@@ -174,7 +174,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
RetryStatusCodes: []int{503, 501},
LoadBalancingPolicy: "first_available",
DropSrcPathPrefixParts: new(2),
DropSrcPathPrefixParts: intp(2),
}, "/a/b/c", "http://foo.bar/c", `bb: aaa`, `x: y`, []int{503, 501}, "first_available", 2)
f(&UserInfo{
URLPrefix: mustParseURL("http://foo.bar/federate"),
@@ -219,13 +219,13 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
RetryStatusCodes: []int{503, 500, 501},
LoadBalancingPolicy: "first_available",
DropSrcPathPrefixParts: new(1),
DropSrcPathPrefixParts: intp(1),
},
{
SrcPaths: getRegexs([]string{"/api/v1/write"}),
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
RetryStatusCodes: []int{},
DropSrcPathPrefixParts: new(0),
DropSrcPathPrefixParts: intp(0),
},
{
SrcPaths: getRegexs([]string{"/metrics"}),
@@ -242,7 +242,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
},
RetryStatusCodes: []int{502},
DropSrcPathPrefixParts: new(2),
DropSrcPathPrefixParts: intp(2),
}
f(ui, "http://host42/vmsingle/api/v1/query?query=up&db=foo", "http://vmselect/0/prometheus/api/v1/query?db=foo&query=up",
"xx: aa\nyy: asdf", "qwe: rty", []int{503, 500, 501}, "first_available", 1)
@@ -259,7 +259,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
SrcPaths: getRegexs([]string{"/api/v1/write"}),
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
RetryStatusCodes: []int{},
DropSrcPathPrefixParts: new(0),
DropSrcPathPrefixParts: intp(0),
},
{
SrcPaths: getRegexs([]string{"/metrics/a/b"}),
@@ -275,7 +275,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
},
},
RetryStatusCodes: []int{502},
DropSrcPathPrefixParts: new(2),
DropSrcPathPrefixParts: intp(2),
}
f(ui, "https://foo-host/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "", "", []int{}, "least_loaded", 0)
f(ui, "https://foo-host/metrics/a/b", "http://metrics-server/b", "", "", []int{502}, "least_loaded", 2)

View File

@@ -212,7 +212,7 @@ func newSrcFS() (*fslocal.FS, error) {
}
func newDstFS(ctx context.Context) (common.RemoteFS, error) {
fs, err := actions.NewRemoteFS(ctx, *dst, nil)
fs, err := actions.NewRemoteFS(ctx, *dst)
if err != nil {
return nil, fmt.Errorf("cannot parse `-dst`=%q: %w", *dst, err)
}
@@ -255,7 +255,7 @@ func newOriginFS(ctx context.Context) (common.OriginFS, error) {
if len(*origin) == 0 {
return &fsnil.FS{}, nil
}
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
fs, err := actions.NewRemoteFS(ctx, *origin)
if err != nil {
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
}
@@ -266,7 +266,7 @@ func newRemoteOriginFS(ctx context.Context) (common.RemoteFS, error) {
if len(*origin) == 0 {
return nil, fmt.Errorf("-origin cannot be empty when -snapshotName and -snapshot.createURL aren't set")
}
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
fs, err := actions.NewRemoteFS(ctx, *origin)
if err != nil {
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
}

View File

@@ -7,8 +7,6 @@ import (
"math"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@@ -47,7 +45,7 @@ func New(retries int, factor float64, minDuration time.Duration) (*Backoff, erro
// Retry process retries until all attempts are completed
func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
var attempt uint64
for i := range b.retries {
for i := 0; i < b.retries; i++ {
err := cb()
if err == nil {
return attempt, nil
@@ -57,7 +55,6 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
return attempt, err // fail fast if not recoverable
}
attempt++
retriesTotal.Inc()
backoff := float64(b.minDuration) * math.Pow(b.factor, float64(i))
dur := time.Duration(backoff)
logger.Errorf("got error: %s on attempt: %d; will retry in %v", err, attempt, dur)
@@ -77,7 +74,3 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
}
return attempt, fmt.Errorf("execution failed after %d retry attempts", b.retries)
}
var (
retriesTotal = metrics.NewCounter(`vmctl_backoff_retries_total`)
)

View File

@@ -14,12 +14,6 @@ const (
globalSilent = "s"
globalVerbose = "verbose"
globalDisableProgressBar = "disable-progress-bar"
globalPushMetricsURL = "pushmetrics.url"
globalPushMetricsInterval = "pushmetrics.interval"
globalPushExtraLabels = "pushmetrics.extraLabel"
globalPushHeaders = "pushmetrics.header"
globalPushDisableCompression = "pushmetrics.disableCompression"
)
var (
@@ -39,29 +33,6 @@ var (
Value: false,
Usage: "Whether to disable progress bar during the import.",
},
&cli.StringSliceFlag{
Name: globalPushMetricsURL,
Usage: "Optional URL to push metrics. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics",
},
&cli.DurationFlag{
Name: globalPushMetricsInterval,
Value: 10 * time.Second,
Usage: "Interval for pushing metrics to every -pushmetrics.url",
},
&cli.StringSliceFlag{
Name: globalPushExtraLabels,
Usage: "Extra labels to add to pushed metrics. In case of collision, label value defined by flag will have priority. " +
"Flag can be set multiple times, to add few additional labels. " +
"For example, -pushmetrics.extraLabel='instance=\"foo\"' adds instance=\"foo\" label to all the metrics pushed to every -pushmetrics.url",
},
&cli.StringSliceFlag{
Name: globalPushHeaders,
Usage: "Optional HTTP headers to add to pushed metrics. Flag can be set multiple times, to add few additional headers.",
},
&cli.BoolFlag{
Name: globalPushDisableCompression,
Usage: "Whether to disable compression when pushing metrics.",
},
}
)
@@ -152,32 +123,32 @@ var (
Name: vmExtraLabel,
Value: nil,
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
" will have priority. Flag can be set multiple times, to add few additional labels.",
"will have priority. Flag can be set multiple times, to add few additional labels.",
},
&cli.Int64Flag{
Name: vmRateLimit,
Usage: "Optional data transfer rate limit in bytes per second.\n" +
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vm-addr' destination.",
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vmAddr' destination.",
},
&cli.StringFlag{
Name: vmCertFile,
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vm-addr'",
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vmAddr'",
},
&cli.StringFlag{
Name: vmKeyFile,
Usage: "Optional path to client-side TLS key to use when connecting to '--vm-addr'",
Usage: "Optional path to client-side TLS key to use when connecting to '--vmAddr'",
},
&cli.StringFlag{
Name: vmCAFile,
Usage: "Optional path to TLS CA file to use for verifying connections to '--vm-addr'. By default, system CA is used",
Usage: "Optional path to TLS CA file to use for verifying connections to '--vmAddr'. By default, system CA is used",
},
&cli.StringFlag{
Name: vmServerName,
Usage: "Optional TLS server name to use for connections to '--vm-addr'. By default, the server name from '--vm-addr' is used",
Usage: "Optional TLS server name to use for connections to '--vmAddr'. By default, the server name from '--vmAddr' is used",
},
&cli.BoolFlag{
Name: vmInsecureSkipVerify,
Usage: "Whether to skip tls verification when connecting to '--vm-addr'",
Usage: "Whether to skip tls verification when connecting to '--vmAddr'",
Value: false,
},
&cli.IntFlag{
@@ -497,7 +468,7 @@ var (
Name: vmNativeFilterMatch,
Usage: "Time series selector to match series for export. For example, select {instance!=\"localhost\"} will " +
"match all series with \"instance\" label different to \"localhost\".\n" +
" See more details here https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format",
" See more details here https://github.com/VictoriaMetrics/VictoriaMetrics#how-to-export-data-in-native-format",
Value: `{__name__!=""}`,
},
&cli.StringFlag{
@@ -627,7 +598,7 @@ var (
Name: vmExtraLabel,
Value: nil,
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
" will have priority. Flag can be set multiple times, to add few additional labels.",
"will have priority. Flag can be set multiple times, to add few additional labels.",
},
&cli.Int64Flag{
Name: vmRateLimit,
@@ -654,8 +625,8 @@ var (
&cli.BoolFlag{
Name: vmNativeDisableBinaryProtocol,
Usage: "Whether to use https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-json-line-format " +
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API. " +
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks. " +
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API." +
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks." +
"Non-binary export/import API is less efficient, but supports deduplication if it is configured on vm-native-src-addr side.",
Value: false,
},

View File

@@ -1,14 +1,11 @@
package main
import (
"context"
"fmt"
"io"
"log"
"sync"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
@@ -40,7 +37,7 @@ func newInfluxProcessor(ic *influx.Client, im *vm.Importer, cc int, separator st
}
}
func (ip *influxProcessor) run(ctx context.Context) error {
func (ip *influxProcessor) run() error {
series, err := ip.ic.Explore()
if err != nil {
return fmt.Errorf("explore query failed: %s", err)
@@ -50,11 +47,10 @@ func (ip *influxProcessor) run(ctx context.Context) error {
}
question := fmt.Sprintf("Found %d timeseries to import. Continue?", len(series))
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
influxSeriesTotal.Add(len(series))
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing series"), len(series))
if err := barpool.Start(); err != nil {
return err
@@ -66,18 +62,18 @@ func (ip *influxProcessor) run(ctx context.Context) error {
ip.im.ResetStats()
var wg sync.WaitGroup
for range ip.cc {
wg.Go(func() {
wg.Add(ip.cc)
for i := 0; i < ip.cc; i++ {
go func() {
defer wg.Done()
for s := range seriesCh {
if err := ip.do(s); err != nil {
influxErrorsTotal.Inc()
errCh <- fmt.Errorf("request failed for %q.%q: %s", s.Measurement, s.Field, err)
return
}
influxSeriesProcessed.Inc()
bar.Increment()
}
})
}()
}
// any error breaks the import
@@ -86,7 +82,6 @@ func (ip *influxProcessor) run(ctx context.Context) error {
case infErr := <-errCh:
return fmt.Errorf("influx error: %s", infErr)
case vmErr := <-ip.im.Errors():
influxErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
case seriesCh <- s:
}
@@ -99,7 +94,6 @@ func (ip *influxProcessor) run(ctx context.Context) error {
// drain import errors channel
for vmErr := range ip.im.Errors() {
if vmErr.Err != nil {
influxErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
}
}
@@ -174,9 +168,3 @@ func (ip *influxProcessor) do(s *influx.Series) error {
}
}
}
var (
influxSeriesTotal = metrics.NewCounter(`vmctl_influx_migration_series_total`)
influxSeriesProcessed = metrics.NewCounter(`vmctl_influx_migration_series_processed`)
influxErrorsTotal = metrics.NewCounter(`vmctl_influx_migration_errors_total`)
)

View File

@@ -4,8 +4,6 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
)
@@ -47,16 +45,9 @@ func (l *Limiter) Register(dataLen int) {
t := timerpool.Get(d)
<-t.C
timerpool.Put(t)
limiterThrottleEventsTotal.Inc()
}
l.budget += limit
l.deadline = time.Now().Add(time.Second)
}
l.budget -= int64(dataLen)
limiterBytesProcessed.Add(dataLen)
}
var (
limiterBytesProcessed = metrics.NewCounter(`vmctl_limiter_bytes_processed_total`)
limiterThrottleEventsTotal = metrics.NewCounter(`vmctl_limiter_throttle_events_total`)
)

View File

@@ -2,7 +2,6 @@ package main
import (
"context"
"flag"
"fmt"
"log"
"net/http"
@@ -20,9 +19,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/native"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
@@ -44,20 +41,11 @@ func main() {
ctx, cancelCtx := context.WithCancel(context.Background())
start := time.Now()
beforeFn := func(c *cli.Context) error {
flag.Parse()
logger.Init()
isSilent = c.Bool(globalSilent)
if c.Bool(globalDisableProgressBar) {
barpool.Disable(true)
}
netutil.EnableIPv6()
pushmetrics.InitWith(&pushmetrics.Config{
URLs: c.StringSlice(globalPushMetricsURL),
Interval: c.Duration(globalPushMetricsInterval),
ExtraLabels: c.StringSlice(globalPushExtraLabels),
DisableCompression: c.Bool(globalPushDisableCompression),
Headers: c.StringSlice(globalPushHeaders),
})
return nil
}
app := &cli.App{
@@ -115,7 +103,7 @@ func main() {
}
otsdbProcessor := newOtsdbProcessor(otsdbClient, importer, c.Int(otsdbConcurrency), c.Bool(globalVerbose))
return otsdbProcessor.run(ctx)
return otsdbProcessor.run()
},
},
{
@@ -176,7 +164,7 @@ func main() {
c.Bool(influxSkipDatabaseLabel),
c.Bool(influxPrometheusMode),
c.Bool(globalVerbose))
return processor.run(ctx)
return processor.run()
},
},
{
@@ -291,7 +279,7 @@ func main() {
cc: c.Int(promConcurrency),
isVerbose: c.Bool(globalVerbose),
}
return pp.run(ctx)
return pp.run()
},
},
{
@@ -463,7 +451,6 @@ func main() {
log.Fatalln(err)
}
log.Printf("Total time: %v", time.Since(start))
pushmetrics.StopAndPush()
}
func initConfigVM(c *cli.Context) (vm.Config, error) {

View File

@@ -8,8 +8,6 @@ import (
"net/http"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/auth"
)
@@ -38,15 +36,12 @@ type Response struct {
// Explore finds metric names by provided filter from api/v1/label/__name__/values
func (c *Client) Explore(ctx context.Context, f Filter, tenantID string, start, end time.Time) ([]string, error) {
startTime := time.Now()
exploreRequestsTotal.Inc()
url := fmt.Sprintf("%s/%s", c.Addr, nativeMetricNamesAddr)
if tenantID != "" {
url = fmt.Sprintf("%s/select/%s/prometheus/%s", c.Addr, tenantID, nativeMetricNamesAddr)
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
exploreRequestsErrorsTotal.Inc()
return nil, fmt.Errorf("cannot create request to %q: %s", url, err)
}
@@ -58,53 +53,37 @@ func (c *Client) Explore(ctx context.Context, f Filter, tenantID string, start,
resp, err := c.do(req, http.StatusOK)
if err != nil {
exploreRequestsErrorsTotal.Inc()
exploreDuration.UpdateDuration(startTime)
return nil, fmt.Errorf("series request failed: %s", err)
}
var response Response
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
exploreRequestsErrorsTotal.Inc()
exploreDuration.UpdateDuration(startTime)
return nil, fmt.Errorf("cannot decode series response: %s", err)
}
exploreDuration.UpdateDuration(startTime)
return response.MetricNames, resp.Body.Close()
}
// ImportPipe uses pipe reader in request to process data
func (c *Client) ImportPipe(ctx context.Context, dstURL string, pr *io.PipeReader) error {
startTime := time.Now()
importRequestsTotal.Inc()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, dstURL, pr)
if err != nil {
importRequestsErrorsTotal.Inc()
return fmt.Errorf("cannot create import request to %q: %s", c.Addr, err)
}
importResp, err := c.do(req, http.StatusNoContent)
if err != nil {
importRequestsErrorsTotal.Inc()
importDuration.UpdateDuration(startTime)
return fmt.Errorf("import request failed: %s", err)
}
if err := importResp.Body.Close(); err != nil {
importRequestsErrorsTotal.Inc()
importDuration.UpdateDuration(startTime)
return fmt.Errorf("cannot close import response body: %s", err)
}
importDuration.UpdateDuration(startTime)
return nil
}
// ExportPipe makes request by provided filter and return io.ReadCloser which can be used to get data
func (c *Client) ExportPipe(ctx context.Context, url string, f Filter) (io.ReadCloser, error) {
startTime := time.Now()
exportRequestsTotal.Inc()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
exportRequestsErrorsTotal.Inc()
return nil, fmt.Errorf("cannot create request to %q: %s", c.Addr, err)
}
@@ -123,11 +102,8 @@ func (c *Client) ExportPipe(ctx context.Context, url string, f Filter) (io.ReadC
resp, err := c.do(req, http.StatusOK)
if err != nil {
exportRequestsErrorsTotal.Inc()
exportDuration.UpdateDuration(startTime)
return nil, fmt.Errorf("export request failed: %w", err)
}
exportDuration.UpdateDuration(startTime)
return resp.Body, nil
}
@@ -186,16 +162,3 @@ func (c *Client) do(req *http.Request, expSC int) (*http.Response, error) {
}
return resp, err
}
var (
importRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="import"}`)
exportRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="export"}`)
exploreRequestsTotal = metrics.NewCounter(`vmctl_vm_native_requests_total{type="explore"}`)
importRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="import"}`)
exportRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="export"}`)
exploreRequestsErrorsTotal = metrics.NewCounter(`vmctl_vm_native_request_errors_total{type="explore"}`)
importDuration = metrics.NewHistogram(`vmctl_vm_native_import_duration_seconds`)
exportDuration = metrics.NewHistogram(`vmctl_vm_native_export_duration_seconds`)
exploreDuration = metrics.NewHistogram(`vmctl_vm_native_explore_duration_seconds`)
)

View File

@@ -1,14 +1,11 @@
package main
import (
"context"
"fmt"
"log"
"sync"
"time"
vmetrics "github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
"github.com/cheggaaa/pb/v3"
@@ -40,7 +37,7 @@ func newOtsdbProcessor(oc *opentsdb.Client, im *vm.Importer, otsdbcc int, verbos
}
}
func (op *otsdbProcessor) run(ctx context.Context) error {
func (op *otsdbProcessor) run() error {
log.Println("Loading all metrics from OpenTSDB for filters: ", op.oc.Filters)
var metrics []string
for _, filter := range op.oc.Filters {
@@ -56,10 +53,9 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
}
question := fmt.Sprintf("Found %d metrics to import. Continue?", len(metrics))
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
op.im.ResetStats()
var startTime int64
if op.oc.HardTS != 0 {
@@ -87,24 +83,23 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
seriesCh := make(chan queryObj, op.otsdbcc)
errCh := make(chan error)
// we're going to make serieslist * queryRanges queries, so we should represent that in the progress bar
otsdbSeriesTotal.Add(len(serieslist) * queryRanges)
bar := pb.StartNew(len(serieslist) * queryRanges)
defer func(bar *pb.ProgressBar) {
bar.Finish()
}(bar)
var wg sync.WaitGroup
for range op.otsdbcc {
wg.Go(func() {
wg.Add(op.otsdbcc)
for i := 0; i < op.otsdbcc; i++ {
go func() {
defer wg.Done()
for s := range seriesCh {
if err := op.do(s); err != nil {
otsdbErrorsTotal.Inc()
errCh <- fmt.Errorf("couldn't retrieve series for %s : %s", metric, err)
return
}
otsdbSeriesProcessed.Inc()
bar.Increment()
}
})
}()
}
/*
Loop through all series for this metric, processing all retentions and time ranges
@@ -121,7 +116,6 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
case otsdbErr := <-errCh:
return fmt.Errorf("opentsdb error: %s", otsdbErr)
case vmErr := <-op.im.Errors():
otsdbErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
case seriesCh <- queryObj{
Tr: tr, StartTime: startTime,
@@ -146,7 +140,6 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
op.im.Close()
for vmErr := range op.im.Errors() {
if vmErr.Err != nil {
otsdbErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
}
}
@@ -177,9 +170,3 @@ func (op *otsdbProcessor) do(s queryObj) error {
}
return op.im.Input(&ts)
}
var (
otsdbSeriesTotal = vmetrics.NewCounter(`vmctl_opentsdb_migration_series_total`)
otsdbSeriesProcessed = vmetrics.NewCounter(`vmctl_opentsdb_migration_series_processed`)
otsdbErrorsTotal = vmetrics.NewCounter(`vmctl_opentsdb_migration_errors_total`)
)

View File

@@ -109,7 +109,7 @@ func (c Client) FindMetrics(q string) ([]string, error) {
return nil, fmt.Errorf("failed to send GET request to %q: %s", q, err)
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad return from OpenTSDB: %d: %v", resp.StatusCode, resp)
return nil, fmt.Errorf("bad return from OpenTSDB: %q: %v", resp.StatusCode, resp)
}
defer func() { _ = resp.Body.Close() }()
body, err := io.ReadAll(resp.Body)
@@ -133,7 +133,7 @@ func (c Client) FindSeries(metric string) ([]Meta, error) {
return nil, fmt.Errorf("failed to set GET request to %q: %s", q, err)
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad return from OpenTSDB: %d: %v", resp.StatusCode, resp)
return nil, fmt.Errorf("bad return from OpenTSDB: %q: %v", resp.StatusCode, resp)
}
defer func() { _ = resp.Body.Close() }()
body, err := io.ReadAll(resp.Body)

View File

@@ -1,18 +1,13 @@
package main
import (
"context"
"fmt"
"log"
"strings"
"sync"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
@@ -35,7 +30,7 @@ type prometheusProcessor struct {
isVerbose bool
}
func (pp *prometheusProcessor) run(ctx context.Context) error {
func (pp *prometheusProcessor) run() error {
blocks, err := pp.cl.Explore()
if err != nil {
return fmt.Errorf("explore failed: %s", err)
@@ -44,7 +39,7 @@ func (pp *prometheusProcessor) run(ctx context.Context) error {
return fmt.Errorf("found no blocks to import")
}
question := fmt.Sprintf("Found %d blocks to import. Continue?", len(blocks))
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
@@ -65,19 +60,19 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
var it chunkenc.Iterator
for ss.Next() {
var name string
var labelPairs []vm.LabelPair
var labels []vm.LabelPair
series := ss.At()
series.Labels().Range(func(label labels.Label) {
for _, label := range series.Labels() {
if label.Name == "__name__" {
name = label.Value
return
continue
}
labelPairs = append(labelPairs, vm.LabelPair{
Name: strings.Clone(label.Name),
Value: strings.Clone(label.Value),
labels = append(labels, vm.LabelPair{
Name: label.Name,
Value: label.Value,
})
})
}
if name == "" {
return fmt.Errorf("failed to find `__name__` label in labelset for block %v", b.Meta().ULID)
}
@@ -103,7 +98,7 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
}
ts := vm.TimeSeries{
Name: name,
LabelPairs: labelPairs,
LabelPairs: labels,
Timestamps: timestamps,
Values: values,
}
@@ -115,7 +110,6 @@ func (pp *prometheusProcessor) do(b tsdb.BlockReader) error {
}
func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
promBlocksTotal.Add(len(blocks))
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing blocks"), len(blocks))
if err := barpool.Start(); err != nil {
return err
@@ -127,18 +121,18 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
pp.im.ResetStats()
var wg sync.WaitGroup
for range pp.cc {
wg.Go(func() {
wg.Add(pp.cc)
for i := 0; i < pp.cc; i++ {
go func() {
defer wg.Done()
for br := range blockReadersCh {
if err := pp.do(br); err != nil {
promErrorsTotal.Inc()
errCh <- fmt.Errorf("read failed for block %q: %s", br.Meta().ULID, err)
return
}
promBlocksProcessed.Inc()
bar.Increment()
}
})
}()
}
// any error breaks the import
for _, br := range blocks {
@@ -148,7 +142,6 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
return fmt.Errorf("prometheus error: %s", promErr)
case vmErr := <-pp.im.Errors():
close(blockReadersCh)
promErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, pp.isVerbose))
case blockReadersCh <- br:
}
@@ -162,7 +155,6 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
// drain import errors channel
for vmErr := range pp.im.Errors() {
if vmErr.Err != nil {
promErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, pp.isVerbose))
}
}
@@ -172,9 +164,3 @@ func (pp *prometheusProcessor) processBlocks(blocks []tsdb.BlockReader) error {
return nil
}
var (
promBlocksTotal = metrics.NewCounter(`vmctl_prometheus_migration_blocks_total`)
promBlocksProcessed = metrics.NewCounter(`vmctl_prometheus_migration_blocks_processed`)
promErrorsTotal = metrics.NewCounter(`vmctl_prometheus_migration_errors_total`)
)

View File

@@ -7,8 +7,6 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/stepper"
@@ -49,11 +47,10 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
question := fmt.Sprintf("Selected time range %q - %q will be split into %d ranges according to %q step. Continue?",
rrp.filter.timeStart.String(), rrp.filter.timeEnd.String(), len(ranges), rrp.filter.chunk)
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
remoteReadRangesTotal.Add(len(ranges))
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing ranges"), len(ranges))
if err := barpool.Start(); err != nil {
return err
@@ -69,18 +66,18 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
errCh := make(chan error)
var wg sync.WaitGroup
for range rrp.cc {
wg.Go(func() {
wg.Add(rrp.cc)
for i := 0; i < rrp.cc; i++ {
go func() {
defer wg.Done()
for r := range rangeC {
if err := rrp.do(ctx, r); err != nil {
remoteReadErrorsTotal.Inc()
errCh <- fmt.Errorf("request failed for: %s", err)
return
}
remoteReadRangesProcessed.Inc()
bar.Increment()
}
})
}()
}
for _, r := range ranges {
@@ -88,7 +85,6 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
case infErr := <-errCh:
return fmt.Errorf("remote read error: %s", infErr)
case vmErr := <-rrp.dst.Errors():
remoteReadErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, rrp.isVerbose))
case rangeC <- &remoteread.Filter{
StartTimestampMs: r[0].UnixMilli(),
@@ -104,7 +100,6 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
// drain import errors channel
for vmErr := range rrp.dst.Errors() {
if vmErr.Err != nil {
remoteReadErrorsTotal.Inc()
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, rrp.isVerbose))
}
}
@@ -125,9 +120,3 @@ func (rrp *remoteReadProcessor) do(ctx context.Context, filter *remoteread.Filte
return nil
})
}
var (
remoteReadRangesTotal = metrics.NewCounter(`vmctl_remote_read_migration_ranges_total`)
remoteReadRangesProcessed = metrics.NewCounter(`vmctl_remote_read_migration_ranges_processed`)
remoteReadErrorsTotal = metrics.NewCounter(`vmctl_remote_read_migration_errors_total`)
)

View File

@@ -2,7 +2,6 @@ package main
import (
"bufio"
"context"
"fmt"
"os"
"strings"
@@ -16,7 +15,7 @@ const barTpl = `{{ blue "%s:" }} {{ counters . }} {{ bar . "[" "█" (cycle . "
// isSilent should be inited in main
var isSilent bool
func prompt(ctx context.Context, question string) bool {
func prompt(question string) bool {
if isSilent {
return true
}
@@ -26,32 +25,15 @@ func prompt(ctx context.Context, question string) bool {
}
reader := bufio.NewReader(os.Stdin)
fmt.Print(question, " [Y/n] ")
answerCh := make(chan string, 1)
errCh := make(chan error, 1)
go func() {
answer, err := reader.ReadString('\n')
if err != nil {
errCh <- err
return
}
answerCh <- answer
}()
select {
case <-ctx.Done():
fmt.Println("\nCanceled.")
return false
case err := <-errCh:
answer, err := reader.ReadString('\n')
if err != nil {
panic(err)
case answer := <-answerCh:
answer = strings.TrimSpace(strings.ToLower(answer))
if answer == "" || answer == "yes" || answer == "y" {
return true
}
return false
}
answer = strings.TrimSpace(strings.ToLower(answer))
if answer == "" || answer == "yes" || answer == "y" {
return true
}
return false
}
func wrapErr(vmErr *vm.ImportError, verbose bool) error {

View File

@@ -76,11 +76,11 @@ func (ts *TimeSeries) write(w io.Writer) (int, error) {
pointsCount := len(timestampsBatch)
cw.printf(`},"timestamps":[`)
for i := range pointsCount - 1 {
for i := 0; i < pointsCount-1; i++ {
cw.printf(`%d,`, timestampsBatch[i])
}
cw.printf(`%d],"values":[`, timestampsBatch[pointsCount-1])
for i := range pointsCount - 1 {
for i := 0; i < pointsCount-1; i++ {
cw.printf(`%v,`, valuesBatch[i])
}
cw.printf("%v]}\n", valuesBatch[pointsCount-1])

View File

@@ -12,8 +12,6 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/backoff"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/limiter"
@@ -82,12 +80,6 @@ type Importer struct {
s *stats
backoff *backoff.Backoff
importRequestsTotal *metrics.Counter
importRequestsErrorsTotal *metrics.Counter
importSamplesTotal *metrics.Counter
importBytesTotal *metrics.Counter
importDuration *metrics.Histogram
}
// ResetStats resets im stats.
@@ -155,12 +147,6 @@ func NewImporter(ctx context.Context, cfg Config) (*Importer, error) {
input: make(chan *TimeSeries, cfg.Concurrency*4),
errors: make(chan *ImportError, cfg.Concurrency),
backoff: cfg.Backoff,
importRequestsTotal: metrics.GetOrCreateCounter(`vmctl_importer_requests_total`),
importRequestsErrorsTotal: metrics.GetOrCreateCounter(`vmctl_importer_request_errors_total`),
importSamplesTotal: metrics.GetOrCreateCounter(`vmctl_importer_samples_total`),
importBytesTotal: metrics.GetOrCreateCounter(`vmctl_importer_bytes_total`),
importDuration: metrics.GetOrCreateHistogram(`vmctl_importer_request_duration_seconds`),
}
if err := im.Ping(); err != nil {
return nil, fmt.Errorf("ping to %q failed: %s", addr, err)
@@ -170,13 +156,15 @@ func NewImporter(ctx context.Context, cfg Config) (*Importer, error) {
cfg.BatchSize = 1e5
}
for i := range int(cfg.Concurrency) {
im.wg.Add(int(cfg.Concurrency))
for i := 0; i < int(cfg.Concurrency); i++ {
pbPrefix := fmt.Sprintf(`{{ green "VM worker %d:" }}`, i)
bar := barpool.AddWithTemplate(pbPrefix+pbTpl, 0)
im.wg.Go(func() {
go func(bar barpool.Bar) {
defer im.wg.Done()
im.startWorker(ctx, bar, cfg.BatchSize, cfg.SignificantFigures, cfg.RoundDigits)
})
}(bar)
}
im.ResetStats()
return im, nil
@@ -325,13 +313,9 @@ func (im *Importer) Import(tsBatch []*TimeSeries) error {
return nil
}
startTime := time.Now()
im.importRequestsTotal.Inc()
pr, pw := io.Pipe()
req, err := http.NewRequest(http.MethodPost, im.importPath, pr)
if err != nil {
im.importRequestsErrorsTotal.Inc()
return fmt.Errorf("cannot create request to %q: %s", im.addr, err)
}
if im.user != "" {
@@ -351,7 +335,6 @@ func (im *Importer) Import(tsBatch []*TimeSeries) error {
if im.compress {
zw, err := gzip.NewWriterLevel(w, 1)
if err != nil {
im.importRequestsErrorsTotal.Inc()
return fmt.Errorf("unexpected error when creating gzip writer: %s", err)
}
w = zw
@@ -363,39 +346,29 @@ func (im *Importer) Import(tsBatch []*TimeSeries) error {
for _, ts := range tsBatch {
n, err := ts.write(bw)
if err != nil {
im.importRequestsErrorsTotal.Inc()
return fmt.Errorf("write err: %w", err)
}
totalBytes += n
totalSamples += len(ts.Values)
}
if err := bw.Flush(); err != nil {
im.importRequestsErrorsTotal.Inc()
return err
}
if closer, ok := w.(io.Closer); ok {
err := closer.Close()
if err != nil {
im.importRequestsErrorsTotal.Inc()
return err
}
}
if err := pw.Close(); err != nil {
im.importRequestsErrorsTotal.Inc()
return err
}
requestErr := <-errCh
if requestErr != nil {
im.importRequestsErrorsTotal.Inc()
im.importDuration.UpdateDuration(startTime)
return fmt.Errorf("import request error for %q: %w", im.addr, requestErr)
}
im.importSamplesTotal.Add(totalSamples)
im.importBytesTotal.Add(totalBytes)
im.importDuration.UpdateDuration(startTime)
im.s.Lock()
im.s.bytes += uint64(totalBytes)
im.s.samples += uint64(totalSamples)

View File

@@ -9,8 +9,6 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/backoff"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/limiter"
@@ -81,22 +79,16 @@ func (p *vmNativeProcessor) run(ctx context.Context) error {
return fmt.Errorf("failed to get tenants: %w", err)
}
question := fmt.Sprintf("The following tenants were discovered: %s.\n Continue?", tenants)
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
migrationTenantsTotal.Set(uint64(len(tenants)))
}
for _, tenantID := range tenants {
err := p.runBackfilling(ctx, tenantID, ranges)
if err != nil {
migrationErrorsTotal.Inc()
return fmt.Errorf("migration failed: %s", err)
}
if p.interCluster {
migrationTenantsProcessed.Inc()
}
}
log.Println("Import finished!")
@@ -164,7 +156,6 @@ func (p *vmNativeProcessor) runSingle(ctx context.Context, f native.Filter, srcU
p.s.bytes += uint64(written)
p.s.requests++
p.s.Unlock()
migrationBytesTransferredTotal.AddInt64(written)
if err := pw.Close(); err != nil {
return err
@@ -208,7 +199,7 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
var foundSeriesMsg string
var requestsToMake int
var metricsMap = map[string][][]time.Time{
var metrics = map[string][][]time.Time{
"": ranges,
}
@@ -220,11 +211,11 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
if !p.disablePerMetricRequests {
format = fmt.Sprintf(nativeWithBackoffTpl, barPrefix)
metricsMap, err = p.explore(ctx, p.src, tenantID, ranges)
metrics, err = p.explore(ctx, p.src, tenantID, ranges)
if err != nil {
return fmt.Errorf("failed to explore metric names: %s", err)
}
if len(metricsMap) == 0 {
if len(metrics) == 0 {
errMsg := "no metrics found"
if tenantID != "" {
errMsg = fmt.Sprintf("%s for tenant id: %s", errMsg, tenantID)
@@ -232,28 +223,23 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
log.Println(errMsg)
return nil
}
for _, m := range metricsMap {
for _, m := range metrics {
requestsToMake += len(m)
}
foundSeriesMsg = fmt.Sprintf("Found %d unique metric names to import. Total import/export requests to make %d", len(metricsMap), requestsToMake)
migrationMetricsTotal.Add(len(metricsMap))
} else {
requestsToMake = len(ranges)
foundSeriesMsg = fmt.Sprintf("Found %d unique metric names to import. Total import/export requests to make %d", len(metrics), requestsToMake)
}
if !p.interCluster {
// do not prompt for intercluster because there could be many tenants,
// and we don't want to interrupt the process when moving to the next tenant.
question := foundSeriesMsg + ". Continue?"
if !prompt(ctx, question) {
if !prompt(question) {
return nil
}
} else {
log.Print(foundSeriesMsg)
}
migrationRequestsPlanned.Add(requestsToMake)
bar := barpool.NewSingleProgress(format, requestsToMake)
bar.Start()
defer bar.Finish()
@@ -262,8 +248,10 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
errCh := make(chan error, p.cc)
var wg sync.WaitGroup
for range p.cc {
wg.Go(func() {
for i := 0; i < p.cc; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for f := range filterCh {
if !p.disablePerMetricRequests {
if err := p.do(ctx, f, srcURL, dstURL, nil); err != nil {
@@ -277,13 +265,12 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
return
}
}
migrationRequestsCompleted.Inc()
}
})
}()
}
// any error breaks the import
for mName, mRanges := range metricsMap {
for mName, mRanges := range metrics {
match, err := buildMatchWithFilter(p.filter.Match, mName)
if err != nil {
logger.Errorf("failed to build filter %q for metric name %q: %s", p.filter.Match, mName, err)
@@ -303,9 +290,6 @@ func (p *vmNativeProcessor) runBackfilling(ctx context.Context, tenantID string,
}:
}
}
if !p.disablePerMetricRequests {
migrationMetricsProcessed.Inc()
}
}
close(filterCh)
@@ -414,18 +398,3 @@ func buildMatchWithFilter(filter string, metricName string) (string, error) {
match := "{" + strings.Join(filters, " or ") + "}"
return match, nil
}
var (
migrationMetricsTotal = metrics.NewCounter(`vmctl_vm_native_migration_metrics_total`)
migrationMetricsProcessed = metrics.NewCounter(`vmctl_vm_native_migration_metrics_processed`)
migrationRequestsPlanned = metrics.NewCounter(`vmctl_vm_native_migration_requests_planned`)
migrationRequestsCompleted = metrics.NewCounter(`vmctl_vm_native_migration_requests_completed`)
migrationErrorsTotal = metrics.NewCounter(`vmctl_vm_native_migration_errors_total`)
migrationTenantsTotal = metrics.NewCounter(`vmctl_vm_native_migration_tenants_total`)
migrationTenantsProcessed = metrics.NewCounter(`vmctl_vm_native_migration_tenants_processed`)
migrationBytesTransferredTotal = metrics.NewCounter(`vmctl_vm_native_migration_bytes_transferred_total`)
)

View File

@@ -11,11 +11,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
)
@@ -52,9 +50,8 @@ var (
type InsertCtx struct {
Labels sortedLabels
mrs []storage.MetricRow
mms []metricsmetadata.Row
metricNameBuf []byte
mrs []storage.MetricRow
metricNamesBuf []byte
relabelCtx relabel.Ctx
streamAggrCtx streamAggrCtx
@@ -76,13 +73,8 @@ func (ctx *InsertCtx) Reset(rowsLen int) {
}
mrs = slicesutil.SetLength(mrs, rowsLen)
ctx.mrs = mrs[:0]
mms := ctx.mms
for i := range mms {
cleanMetricMetadata(&mms[i])
}
ctx.mms = mms[:0]
ctx.metricNameBuf = ctx.metricNameBuf[:0]
ctx.metricNamesBuf = ctx.metricNamesBuf[:0]
ctx.relabelCtx.Reset()
ctx.streamAggrCtx.Reset()
ctx.skipStreamAggr = false
@@ -92,20 +84,11 @@ func cleanMetricRow(mr *storage.MetricRow) {
mr.MetricNameRaw = nil
}
func cleanMetricMetadata(mm *metricsmetadata.Row) {
mm.MetricFamilyName = nil
mm.Unit = nil
mm.Help = nil
mm.Type = 0
mm.ProjectID = 0
mm.AccountID = 0
}
func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label) []byte {
start := len(ctx.metricNameBuf)
ctx.metricNameBuf = append(ctx.metricNameBuf, prefix...)
ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf, labels)
metricNameRaw := ctx.metricNameBuf[start:]
start := len(ctx.metricNamesBuf)
ctx.metricNamesBuf = append(ctx.metricNamesBuf, prefix...)
ctx.metricNamesBuf = storage.MarshalMetricNameRaw(ctx.metricNamesBuf, labels)
metricNameRaw := ctx.metricNamesBuf[start:]
return metricNameRaw[:len(metricNameRaw):len(metricNameRaw)]
}
@@ -160,7 +143,7 @@ func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float6
mr.MetricNameRaw = metricNameRaw
mr.Timestamp = timestamp
mr.Value = value
if len(ctx.metricNameBuf) > 16*1024*1024 {
if len(ctx.metricNamesBuf) > 16*1024*1024 {
if err := ctx.FlushBufs(); err != nil {
return err
}
@@ -168,57 +151,6 @@ func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float6
return nil
}
// WriteMetadata writes given prometheus protobuf metadata into the storage.
func (ctx *InsertCtx) WriteMetadata(mmpbs []prompb.MetricMetadata) error {
if len(mmpbs) == 0 {
return nil
}
mms := ctx.mms
mms = slicesutil.SetLength(mms, len(mmpbs))
for idx, mmpb := range mmpbs {
mm := &mms[idx]
mm.MetricFamilyName = bytesutil.ToUnsafeBytes(mmpb.MetricFamilyName)
mm.Help = bytesutil.ToUnsafeBytes(mmpb.Help)
mm.Type = mmpb.Type
mm.Unit = bytesutil.ToUnsafeBytes(mmpb.Unit)
}
ctx.mms = mms
err := vmstorage.AddMetadataRows(mms)
if err != nil {
return &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot store metrics metadata: %w", err),
StatusCode: http.StatusServiceUnavailable,
}
}
return nil
}
// WritePromMetadata writes given prometheus metric metadata into the storage
func (ctx *InsertCtx) WritePromMetadata(mmps []prometheus.Metadata) error {
if len(mmps) == 0 {
return nil
}
mms := ctx.mms
mms = slicesutil.SetLength(mms, len(mmps))
for idx, mmpb := range mmps {
mm := &mms[idx]
mm.MetricFamilyName = bytesutil.ToUnsafeBytes(mmpb.Metric)
mm.Help = bytesutil.ToUnsafeBytes(mmpb.Help)
mm.Type = mmpb.Type
}
ctx.mms = mms
err := vmstorage.AddMetadataRows(mms)
if err != nil {
return &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot store prometheus metrics metadata: %w", err),
StatusCode: http.StatusServiceUnavailable,
}
}
return nil
}
// AddLabelBytes adds (name, value) label to ctx.Labels.
//
// name and value must exist until ctx.Labels is used.

View File

@@ -111,7 +111,9 @@ func InitStreamAggr() {
saCfgTimestamp.Set(fasttime.UnixTimestamp())
// Start config reloader.
saCfgReloaderWG.Go(func() {
saCfgReloaderWG.Add(1)
go func() {
defer saCfgReloaderWG.Done()
for {
select {
case <-sighupCh:
@@ -120,7 +122,7 @@ func InitStreamAggr() {
}
reloadStreamAggrConfig()
}
})
}()
}
func reloadStreamAggrConfig() {

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
@@ -232,17 +231,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
firehose.WriteSuccessResponse(w, r)
return true
case "/zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -435,9 +423,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vm_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vm_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)

View File

@@ -6,7 +6,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
@@ -15,9 +14,8 @@ import (
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentelemetry"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="opentelemetry"}`)
metadataInserted = metrics.NewCounter(`vm_metadata_rows_inserted_total{type="opentelemetry"}`)
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentelemetry"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="opentelemetry"}`)
)
// InsertHandler processes opentelemetry metrics.
@@ -35,12 +33,12 @@ func InsertHandler(req *http.Request) error {
return fmt.Errorf("json encoding isn't supported for opentelemetry format. Use protobuf encoding")
}
}
return stream.ParseStream(req.Body, encoding, processBody, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(tss, mms, extraLabels)
return stream.ParseStream(req.Body, encoding, processBody, func(tss []prompb.TimeSeries, _ []prompb.MetricMetadata) error {
return insertRows(tss, extraLabels)
})
}
func insertRows(tss []prompb.TimeSeries, mms []prompb.MetricMetadata, extraLabels []prompb.Label) error {
func insertRows(tss []prompb.TimeSeries, extraLabels []prompb.Label) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
@@ -77,14 +75,5 @@ func insertRows(tss []prompb.TimeSeries, mms []prompb.MetricMetadata, extraLabel
}
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
if err := ctx.FlushBufs(); err != nil {
return fmt.Errorf("cannot flush metric bufs: %w", err)
}
if prommetadata.IsEnabled() {
if err := ctx.WriteMetadata(mms); err != nil {
return err
}
metadataInserted.Add(len(mms))
}
return nil
return ctx.FlushBufs()
}

View File

@@ -1,7 +1,6 @@
package prometheusimport
import (
"fmt"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
@@ -16,9 +15,8 @@ import (
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="prometheus"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="prometheus"}`)
metadataInserted = metrics.NewCounter(`vm_metadata_rows_inserted_total{type="prometheus"}`)
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="prometheus"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="prometheus"}`)
)
// InsertHandler processes `/api/v1/import/prometheus` request.
@@ -32,14 +30,14 @@ func InsertHandler(req *http.Request) error {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return insertRows(rows, mms, extraLabels)
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, _ []prometheus.Metadata) error {
return insertRows(rows, extraLabels)
}, func(s string) {
httpserver.LogError(req, s)
})
}
func insertRows(rows []prometheus.Row, mms []prometheus.Metadata, extraLabels []prompb.Label) error {
func insertRows(rows []prometheus.Row, extraLabels []prompb.Label) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
@@ -66,15 +64,5 @@ func insertRows(rows []prometheus.Row, mms []prometheus.Metadata, extraLabels []
}
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
if err := ctx.FlushBufs(); err != nil {
return fmt.Errorf("cannot flush metric bufs: %w", err)
}
if prommetadata.IsEnabled() {
if err := ctx.WritePromMetadata(mms); err != nil {
return err
}
metadataInserted.Add(len(mms))
}
return nil
return ctx.FlushBufs()
}

View File

@@ -4,15 +4,13 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="promscrape"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="promscrape"}`)
metadataRowsInserted = metrics.NewCounter(`vm_metadata_rows_inserted_total{type="promscrape"}`)
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="promscrape"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="promscrape"}`)
)
const maxRowsPerBlock = 10000
@@ -43,13 +41,6 @@ func Push(wr *prompb.WriteRequest) {
}
push(ctx, tssBlock)
}
if prommetadata.IsEnabled() {
if err := ctx.WriteMetadata(wr.Metadata); err != nil {
logger.Errorf("cannot write promscrape metrics metadata to storage: %s", err)
} else {
metadataRowsInserted.Add(len(wr.Metadata))
}
}
}
func push(ctx *common.InsertCtx, tss []prompb.TimeSeries) {

View File

@@ -1,12 +1,10 @@
package promremotewrite
import (
"fmt"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -14,9 +12,8 @@ import (
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="promremotewrite"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="promremotewrite"}`)
metadataInserted = metrics.NewCounter(`vm_metadata_rows_inserted_total{type="promremotewrite"}`)
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="promremotewrite"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="promremotewrite"}`)
)
// InsertHandler processes remote write for prometheus.
@@ -26,12 +23,12 @@ func InsertHandler(req *http.Request) error {
return err
}
isVMRemoteWrite := req.Header.Get("Content-Encoding") == "zstd"
return stream.Parse(req.Body, isVMRemoteWrite, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(tss, mms, extraLabels)
return stream.Parse(req.Body, isVMRemoteWrite, func(tss []prompb.TimeSeries, _ []prompb.MetricMetadata) error {
return insertRows(tss, extraLabels)
})
}
func insertRows(timeseries []prompb.TimeSeries, mms []prompb.MetricMetadata, extraLabels []prompb.Label) error {
func insertRows(timeseries []prompb.TimeSeries, extraLabels []prompb.Label) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
@@ -71,15 +68,5 @@ func insertRows(timeseries []prompb.TimeSeries, mms []prompb.MetricMetadata, ext
}
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
if err := ctx.FlushBufs(); err != nil {
return fmt.Errorf("cannot flush metric bufs: %w", err)
}
if prommetadata.IsEnabled() {
if err := ctx.WriteMetadata(mms); err != nil {
return err
}
metadataInserted.Add(len(mms))
}
return nil
return ctx.FlushBufs()
}

View File

@@ -1,67 +0,0 @@
package zabbixconnector
import (
"net/http"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/relabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="zabbixconnector"}`)
rowsPerInsert = metrics.NewHistogram(`vm_rows_per_insert{type="zabbixconnector"}`)
)
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
func InsertHandlerForHTTP(req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
return insertRows(rows, extraLabels)
})
}
func insertRows(rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
ctx := common.GetInsertCtx()
defer common.PutInsertCtx(ctx)
rowsTotal := len(rows)
ctx.Reset(rowsTotal)
hasRelabeling := relabel.HasRelabeling()
for i := range rows {
r := &rows[i]
ctx.Labels = ctx.Labels[:0]
for k := range r.Tags {
t := &r.Tags[k]
ctx.AddLabelBytes(t.Key, t.Value)
}
for k := range extraLabels {
label := &extraLabels[k]
ctx.AddLabel(label.Name, label.Value)
}
if hasRelabeling {
ctx.ApplyRelabeling()
}
if len(ctx.Labels) == 0 {
// Skip metric without labels.
continue
}
ctx.SortLabelsIfNeeded()
if err := ctx.WriteDataPoint(nil, ctx.Labels, r.Timestamp, r.Value); err != nil {
return err
}
}
rowsInserted.Add(rowsTotal)
rowsPerInsert.Update(float64(rowsTotal))
return ctx.FlushBufs()
}

View File

@@ -104,7 +104,7 @@ func newDstFS() (*fslocal.FS, error) {
}
func newSrcFS(ctx context.Context) (common.RemoteFS, error) {
fs, err := actions.NewRemoteFS(ctx, *src, nil)
fs, err := actions.NewRemoteFS(ctx, *src)
if err != nil {
return nil, fmt.Errorf("cannot parse `-src`=%q: %w", *src, err)
}

View File

@@ -142,7 +142,7 @@ type aggrStatePercentile struct {
func newAggrStatePercentile(pointsLen int, n float64) aggrState {
hs := make([]*histogram.Fast, pointsLen)
for i := range pointsLen {
for i := 0; i < pointsLen; i++ {
hs[i] = histogram.NewFast()
}
return &aggrStatePercentile{

View File

@@ -9,7 +9,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
@@ -50,7 +49,7 @@ func (ec *evalConfig) newTimestamps(step int64) []int64 {
pointsLen := ec.pointsLen(step)
timestamps := make([]int64, pointsLen)
ts := ec.startTime
for i := range pointsLen {
for i := 0; i < pointsLen; i++ {
timestamps[i] = ts
ts += step
}
@@ -197,17 +196,12 @@ func newNextSeriesForSearchQuery(ec *evalConfig, sq *storage.SearchQuery, expr g
pathExpression: safePathExpression(expr),
}
s.summarize(aggrAvg, ec.startTime, ec.endTime, ec.storageStep, 0)
// A negative or zero duration will cause timer.C to return immediately
remainingTimeout := ec.deadline.Deadline() - fasttime.UnixTimestamp()
t := timerpool.Get(time.Duration(remainingTimeout) * time.Second)
t := timerpool.Get(30 * time.Second)
defer timerpool.Put(t)
select {
case seriesCh <- s:
case <-t.C:
logger.Errorf("reached timeout when processing the %s (full query: %s), it can be due to the amount of storageNodes configured in vmselect is more than vmselects available CPU count "+
"or vmselect is heavy loaded. Consider adding resources or increasing `-search.maxQueryDuration` or `timeout` parameter in the query.",
logger.Errorf("resource leak when processing the %s (full query: %s); please report this error to VictoriaMetrics developers",
expr.AppendString(nil), ec.originalQuery)
}
return nil

View File

@@ -25,7 +25,7 @@ func naturalLess(a, b string) bool {
}
func getNonNumPrefix(s string) (prefix string, tail string) {
for i := range len(s) {
for i := 0; i < len(s); i++ {
ch := s[i]
if ch >= '0' && ch <= '9' {
return s[:i], s[i:]

View File

@@ -82,7 +82,7 @@ func RenderHandler(startTime time.Time, w http.ResponseWriter, r *http.Request)
if s := r.FormValue("maxDataPoints"); len(s) > 0 {
n, err := strconv.ParseFloat(s, 64)
if err != nil {
return fmt.Errorf("cannot parse maxDataPoints=%d: %w", maxDataPoints, err)
return fmt.Errorf("cannot parse maxDataPoints=%q: %w", maxDataPoints, err)
}
if n <= 0 {
return fmt.Errorf("maxDataPoints must be greater than 0; got %f", n)
@@ -209,7 +209,7 @@ func parseInterval(s string) (int64, error) {
s = strings.TrimSpace(s)
prefix := s
var suffix string
for i := range len(s) {
for i := 0; i < len(s); i++ {
ch := s[i]
if ch != '-' && ch != '+' && ch != '.' && (ch < '0' || ch > '9') {
prefix = s[:i]

View File

@@ -1228,7 +1228,7 @@ func transformDelay(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSeriesFunc, er
stepsLocal = len(values)
}
copy(values[stepsLocal:], values[:len(values)-stepsLocal])
for i := range stepsLocal {
for i := 0; i < stepsLocal; i++ {
values[i] = nan
}
}
@@ -1740,7 +1740,7 @@ func transformGroup(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSeriesFunc, er
func groupSeriesLists(ec *evalConfig, args []*graphiteql.ArgExpr, expr graphiteql.Expr) (nextSeriesFunc, error) {
var nextSeriess []nextSeriesFunc
for i := range args {
for i := 0; i < len(args); i++ {
nextSeries, err := evalSeriesList(ec, args, "seriesList", i)
if err != nil {
for _, f := range nextSeriess {
@@ -3233,7 +3233,7 @@ func transformSeriesByTag(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSeriesFu
return nil, fmt.Errorf("at least one tagExpression must be passed to seriesByTag")
}
var tagExpressions []string
for i := range args {
for i := 0; i < len(args); i++ {
te, err := getString(args, "tagExpressions", i)
if err != nil {
return nil, err
@@ -3633,7 +3633,7 @@ var graphiteToGolangRe = regexp.MustCompile(`\\(\d+)`)
func getNodes(args []*graphiteql.ArgExpr) ([]graphiteql.Expr, error) {
var nodes []graphiteql.Expr
for i := range args {
for i := 0; i < len(args); i++ {
expr := args[i].Expr
switch expr.(type) {
case *graphiteql.NumberExpr, *graphiteql.StringExpr:
@@ -3896,9 +3896,27 @@ func nextSeriesConcurrentWrapper(nextSeries nextSeriesFunc, f func(s *series) (*
seriesCh := make(chan *series, goroutines)
errCh := make(chan error, 1)
var wg sync.WaitGroup
wg.Add(goroutines)
go func() {
var err error
for {
s, e := nextSeries()
if e != nil || s == nil {
err = e
break
}
seriesCh <- s
}
close(seriesCh)
wg.Wait()
close(resultCh)
errCh <- err
close(errCh)
}()
var skipProcessing atomic.Bool
for range goroutines {
wg.Go(func() {
for i := 0; i < goroutines; i++ {
go func() {
defer wg.Done()
for s := range seriesCh {
if skipProcessing.Load() {
continue
@@ -3916,24 +3934,8 @@ func nextSeriesConcurrentWrapper(nextSeries nextSeriesFunc, f func(s *series) (*
}
}
}
})
}()
}
go func() {
var err error
for {
s, e := nextSeries()
if e != nil || s == nil {
err = e
break
}
seriesCh <- s
}
close(seriesCh)
wg.Wait()
close(resultCh)
errCh <- err
close(errCh)
}()
wrapper := func() (*series, error) {
r := <-resultCh
if r == nil {
@@ -4052,7 +4054,7 @@ func formatPathsFromSeriesExpressions(seriesExpressions []string, sortPaths bool
func newNaNSeries(ec *evalConfig, step int64) *series {
values := make([]float64, ec.pointsLen(step))
for i := range values {
for i := 0; i < len(values); i++ {
values[i] = nan
}
return &series{
@@ -5244,7 +5246,7 @@ func transformLinearRegression(ec *evalConfig, fe *graphiteql.FuncExpr) (nextSer
func linearRegressionForSeries(ec *evalConfig, fe *graphiteql.FuncExpr, ss, sourceSeries []*series) (nextSeriesFunc, error) {
var resp []*series
for i := range ss {
for i := 0; i < len(ss); i++ {
source := sourceSeries[i]
s := ss[i]
s.Tags["linearRegressions"] = fmt.Sprintf("%d, %d", ec.startTime/1e3, ec.endTime/1e3)
@@ -5258,7 +5260,7 @@ func linearRegressionForSeries(ec *evalConfig, fe *graphiteql.FuncExpr, ss, sour
continue
}
values := s.Values
for j := range values {
for j := 0; j < len(values); j++ {
values[j] = offset + (float64(int(s.Timestamps[0])+j*int(s.step)))*factor
}
resp = append(resp, s)
@@ -5370,7 +5372,7 @@ func holtWinterConfidenceBands(ec *evalConfig, fe *graphiteql.FuncExpr, args []*
valuesLen := len(forecastValues)
upperBand := make([]float64, 0, valuesLen)
lowerBand := make([]float64, 0, valuesLen)
for i := range valuesLen {
for i := 0; i < valuesLen; i++ {
forecastItem := forecastValues[i]
deviationItem := deviationValues[i]
if math.IsNaN(forecastItem) || math.IsNaN(deviationItem) {
@@ -5464,7 +5466,7 @@ func transformHoltWintersAberration(ec *evalConfig, fe *graphiteql.FuncExpr) (ne
return nil, fmt.Errorf("bug, len mismatch for series: %d and upperBand values: %d or lowerBand values: %d", len(values), len(upperBand), len(lowerBand))
}
aberration := make([]float64, 0, len(values))
for i := range values {
for i := 0; i < len(values); i++ {
v := values[i]
upperValue := upperBand[i]
lowerValue := lowerBand[i]

View File

@@ -280,7 +280,7 @@ func isMetricExprChar(ch byte) bool {
}
func appendEscapedIdent(dst []byte, s string) []byte {
for i := range len(s) {
for i := 0; i < len(s); i++ {
ch := s[i]
if isIdentChar(ch) || isMetricExprChar(ch) {
if i == 0 && !isFirstIdentChar(ch) {

View File

@@ -421,16 +421,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.WriteHeader(http.StatusNoContent)
return true
case "/api/v1/metadata":
// Return dumb placeholder for https://prometheus.io/docs/prometheus/latest/querying/api/#querying-metric-metadata
metadataRequests.Inc()
if err := prometheus.MetadataHandler(qt, startTime, w, r); err != nil {
metadataErrors.Inc()
httpserver.SendPrometheusError(w, r, err)
return true
}
return true
default:
return false
}
@@ -520,7 +510,7 @@ func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path
fmt.Fprintf(w, "%s", `{"status":"error","msg":"for accessing vmalert flag '-vmalert.proxyURL' must be configured"}`)
return true
}
proxyVMAlertRequests(w, r, path)
proxyVMAlertRequests(w, r)
return true
}
@@ -558,7 +548,7 @@ func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path
case "/api/v1/rules", "/rules":
rulesRequests.Inc()
if len(*vmalertProxyURL) > 0 {
proxyVMAlertRequests(w, r, path)
proxyVMAlertRequests(w, r)
return true
}
// Return dumb placeholder for https://prometheus.io/docs/prometheus/latest/querying/api/#rules
@@ -568,7 +558,7 @@ func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path
case "/api/v1/alerts", "/alerts":
alertsRequests.Inc()
if len(*vmalertProxyURL) > 0 {
proxyVMAlertRequests(w, r, path)
proxyVMAlertRequests(w, r)
return true
}
// Return dumb placeholder for https://prometheus.io/docs/prometheus/latest/querying/api/#alerts
@@ -578,12 +568,18 @@ func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path
case "/api/v1/notifiers", "/notifiers":
notifiersRequests.Inc()
if len(*vmalertProxyURL) > 0 {
proxyVMAlertRequests(w, r, path)
proxyVMAlertRequests(w, r)
return true
}
w.Header().Set("Content-Type", "application/json")
fmt.Fprint(w, `{"status":"success","data":{"notifiers":[]}}`)
return true
case "/api/v1/metadata":
// Return dumb placeholder for https://prometheus.io/docs/prometheus/latest/querying/api/#querying-metric-metadata
metadataRequests.Inc()
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, "%s", `{"status":"success","data":{}}`)
return true
case "/api/v1/status/buildinfo":
buildInfoRequests.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -712,9 +708,7 @@ var (
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
notifiersRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/notifiers"}`)
metadataRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/metadata"}`)
metadataErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/metadata"}`)
metadataRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/metadata"}`)
buildInfoRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/buildinfo"}`)
queryExemplarsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query_exemplars"}`)
@@ -725,7 +719,7 @@ var (
metricNamesStatsResetErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/status/metric_names_stats/reset"}`)
)
func proxyVMAlertRequests(w http.ResponseWriter, r *http.Request, path string) {
func proxyVMAlertRequests(w http.ResponseWriter, r *http.Request) {
defer func() {
err := recover()
if err == nil || err == http.ErrAbortHandler {
@@ -736,10 +730,8 @@ func proxyVMAlertRequests(w http.ResponseWriter, r *http.Request, path string) {
// Forward other panics to the caller.
panic(err)
}()
req := r.Clone(r.Context())
req.URL.Path = strings.TrimPrefix(path, "prometheus")
req.Host = vmalertProxyHost
vmalertProxy.ServeHTTP(w, req)
r.Host = vmalertProxyHost
vmalertProxy.ServeHTTP(w, r)
}
var (

View File

@@ -5,8 +5,6 @@ import (
"errors"
"flag"
"fmt"
"math"
"slices"
"sort"
"sync"
"sync/atomic"
@@ -22,7 +20,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
)
var (
@@ -298,12 +295,14 @@ func (rss *Results) runParallel(qt *querytracer.Tracer, f func(rs *Result, worke
// Start workers and wait until they finish the work.
var wg sync.WaitGroup
for workerID := range workChs {
qtChild := qt.NewChild("worker #%d", workerID)
wg.Go(func() {
timeseriesWorker(qtChild, workChs, uint(workerID))
for i := range workChs {
wg.Add(1)
qtChild := qt.NewChild("worker #%d", i)
go func(workerID uint) {
timeseriesWorker(qtChild, workChs, workerID)
qtChild.Done()
})
wg.Done()
}(uint(i))
}
wg.Wait()
@@ -492,7 +491,10 @@ func (pts *packedTimeseries) unpackTo(dst []*sortBlock, tbf *tmpBlocksFile, tr s
}
// Prepare worker channels.
workers := max(min(len(upws), gomaxprocs), 1)
workers := min(len(upws), gomaxprocs)
if workers < 1 {
workers = 1
}
itemsPerWorker := (len(upws) + workers - 1) / workers
workChs := make([]chan *unpackWork, workers)
for i := range workChs {
@@ -511,10 +513,12 @@ func (pts *packedTimeseries) unpackTo(dst []*sortBlock, tbf *tmpBlocksFile, tr s
// Start workers and wait until they finish the work.
var wg sync.WaitGroup
for workerID := range workers {
wg.Go(func() {
unpackWorker(workChs, uint(workerID))
})
for i := 0; i < workers; i++ {
wg.Add(1)
go func(workerID uint) {
unpackWorker(workChs, workerID)
wg.Done()
}(uint(i))
}
wg.Wait()
@@ -577,7 +581,6 @@ func mergeSortBlocks(dst *Result, sbh *sortBlocksHeap, dedupInterval int64) {
return
}
heap.Init(sbh)
var dedupSamples int
for {
sbs := sbh.sbs
top := sbs[0]
@@ -593,7 +596,6 @@ func mergeSortBlocks(dst *Result, sbh *sortBlocksHeap, dedupInterval int64) {
if n := equalSamplesPrefix(top, sbNext); n > 0 && dedupInterval > 0 {
// Skip n replicated samples at top if deduplication is enabled.
top.NextIdx = topNextIdx + n
dedupSamples += n
} else {
// Copy samples from top to dst with timestamps not exceeding tsNext.
top.NextIdx = topNextIdx + binarySearchTimestamps(top.Timestamps[topNextIdx:], tsNext)
@@ -608,8 +610,8 @@ func mergeSortBlocks(dst *Result, sbh *sortBlocksHeap, dedupInterval int64) {
}
}
timestamps, values := storage.DeduplicateSamples(dst.Timestamps, dst.Values, dedupInterval)
dedupSamples += len(dst.Timestamps) - len(timestamps)
dedupsDuringSelect.Add(dedupSamples)
dedups := len(dst.Timestamps) - len(timestamps)
dedupsDuringSelect.Add(dedups)
dst.Timestamps = timestamps
dst.Values = values
}
@@ -635,7 +637,7 @@ func equalTimestampsPrefix(a, b []int64) int {
func equalValuesPrefix(a, b []float64) int {
for i, v := range a {
if i >= len(b) || math.Float64bits(v) != math.Float64bits(b[i]) {
if i >= len(b) || v != b[i] {
return i
}
}
@@ -830,7 +832,12 @@ func GraphiteTags(qt *querytracer.Tracer, filter string, limit int, deadline sea
}
func hasString(a []string, s string) bool {
return slices.Contains(a, s)
for _, x := range a {
if x == s {
return true
}
}
return false
}
// LabelValues returns label values matching the given labelName and sq until the given deadline.
@@ -858,23 +865,6 @@ func LabelValues(qt *querytracer.Tracer, labelName string, sq *storage.SearchQue
return labelValues, nil
}
// GetMetricsMetadata returns time series metric names metadata for the given args
func GetMetricsMetadata(qt *querytracer.Tracer, limit int, metricName string) ([]*metricsmetadata.Row, error) {
qt = qt.NewChild("get metrics metadata: limit=%d, metric_name=%q", limit, metricName)
defer qt.Done()
metadata := vmstorage.Storage.GetMetadataRows(qt, limit, metricName)
sort.Slice(metadata, func(i, j int) bool {
return string(metadata[i].MetricFamilyName) < string(metadata[j].MetricFamilyName)
})
if limit > 0 && len(metadata) >= limit {
metadata = metadata[:limit]
}
return metadata, nil
}
// GraphiteTagValues returns tag values for the given tagName until the given deadline.
func GraphiteTagValues(qt *querytracer.Tracer, tagName, filter string, limit int, deadline searchutil.Deadline) ([]string, error) {
qt = qt.NewChild("get graphite tag values for tagName=%s, filter=%s, limit=%d", tagName, filter, limit)
@@ -1012,10 +1002,12 @@ func ExportBlocks(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline sear
mustStop atomic.Bool
)
var wg sync.WaitGroup
for workerID := range gomaxprocs {
wg.Go(func() {
wg.Add(gomaxprocs)
for i := 0; i < gomaxprocs; i++ {
go func(workerID uint) {
defer wg.Done()
for xw := range workCh {
if err := f(&xw.mn, &xw.b, tr, uint(workerID)); err != nil {
if err := f(&xw.mn, &xw.b, tr, workerID); err != nil {
errGlobalLock.Lock()
if errGlobal == nil {
errGlobal = err
@@ -1026,7 +1018,7 @@ func ExportBlocks(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline sear
xw.reset()
exportWorkPool.Put(xw)
}
})
}(uint(i))
}
// Feed workers with work

Some files were not shown because too many files have changed in this diff Show More