Compare commits

..

2 Commits

Author SHA1 Message Date
f41gh7
2e4afd249e make linter happy
Signed-off-by: f41gh7 <nik@victoriametrics.com>
2026-06-16 10:25:35 +02:00
f41gh7
bac8eca299 lib/httputil: add load-balancing http transport
This commit adds http client round-robin load-balancing with DNS and
 SRV discovery. It allows http client to route HTTP requests evenly for each discovered IP
address for DNS record.

 Discovered IP addresses are cached locally for 5 seconds.

 This feature allows remove intermediate vmauth as a load-balancer between
 vmagent and remote storages. Which simplifies components management and
 reduces operational overhead.

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2388
2026-06-15 08:59:21 +02:00
10 changed files with 510 additions and 64 deletions

View File

@@ -151,17 +151,23 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
}
tr.Proxy = http.ProxyURL(pu)
}
hc := &http.Client{
Transport: authCfg.NewRoundTripper(tr),
Timeout: sendTimeout.GetOptionalArg(argIdx),
}
rwURL, err := url.Parse(remoteWriteURL)
if err != nil {
logger.Fatalf("BUG: cannot parse already parsed -remoteWrite.url=%q: %s", remoteWriteURL, err)
}
hc.Transport, rwURL = httputil.NewLoadBalancerTransport(hc.Transport, rwURL)
retryMaxIntervalFlag := retryMaxTime
if retryMaxInterval.String() != "" {
retryMaxIntervalFlag = retryMaxInterval
}
c := &client{
sanitizedURL: sanitizedURL,
remoteWriteURL: remoteWriteURL,
remoteWriteURL: rwURL.String(),
authCfg: authCfg,
awsCfg: awsCfg,
fq: fq,

View File

@@ -11,6 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
@@ -94,6 +95,12 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
}
tr.IdleConnTimeout = *idleConnectionTimeout
hc := &http.Client{Transport: tr}
datasourceURL, err := url.Parse(*addr)
if err != nil {
logger.Fatalf("BUG: cannot parse already parsed -datasource.url=%q: %s", *addr, err)
}
hc.Transport, datasourceURL = httputil.NewLoadBalancerTransport(tr, datasourceURL)
if extraParams == nil {
extraParams = url.Values{}
@@ -120,9 +127,9 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
}
return &Client{
c: &http.Client{Transport: tr},
c: hc,
authCfg: authCfg,
datasourceURL: strings.TrimSuffix(*addr, "/"),
datasourceURL: strings.TrimSuffix(datasourceURL.String(), "/"),
appendTypePrefix: *appendTypePrefix,
queryStep: *queryStep,
extraParams: extraParams,

View File

@@ -4,12 +4,14 @@ import (
"flag"
"fmt"
"net/http"
"net/url"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
)
@@ -76,7 +78,13 @@ func Init() (datasource.QuerierBuilder, error) {
return nil, fmt.Errorf("failed to create transport for -remoteRead.url=%q: %w", *addr, err)
}
tr.IdleConnTimeout = *idleConnectionTimeout
c := &http.Client{Transport: tr}
rrURL, err := url.Parse(*addr)
if err != nil {
logger.Fatalf("BUG: cannot parse already parsed -remoteRead.url=%q: %s", *addr, err)
}
c.Transport, rrURL = httputil.NewLoadBalancerTransport(tr, rrURL)
endpointParams, err := flagutil.ParseJSONMap(*oauth2EndpointParams)
if err != nil {
return nil, fmt.Errorf("cannot parse JSON for -remoteRead.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
@@ -89,6 +97,5 @@ func Init() (datasource.QuerierBuilder, error) {
if err != nil {
return nil, fmt.Errorf("failed to configure auth: %w", err)
}
c := &http.Client{Transport: tr}
return datasource.NewPrometheusClient(*addr, authCfg, false, c), nil
return datasource.NewPrometheusClient(rrURL.String(), authCfg, false, c), nil
}

View File

@@ -8,6 +8,7 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"path"
"strings"
"sync"
@@ -111,12 +112,18 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
if cfg.Concurrency > 0 {
cc = cfg.Concurrency
}
hc := &http.Client{
Timeout: *sendTimeout,
Transport: cfg.Transport,
}
rwURL, err := url.Parse(cfg.Addr)
if err != nil {
logger.Fatalf("cannot parse already parsed -remoteWrite.url=%q: %s", cfg.Addr, err)
}
hc.Transport, rwURL = httputil.NewLoadBalancerTransport(hc.Transport, rwURL)
c := &Client{
c: &http.Client{
Timeout: *sendTimeout,
Transport: cfg.Transport,
},
addr: strings.TrimSuffix(cfg.Addr, "/"),
c: hc,
addr: strings.TrimSuffix(rwURL.String(), "/"),
authCfg: cfg.AuthCfg,
flushInterval: cfg.FlushInterval,
maxBatchSize: cfg.MaxBatchSize,

View File

@@ -1712,63 +1712,64 @@ The following versions of VictoriaMetrics receive regular security fixes:
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | ✅ |
| other releases | ❌ |
### Software Bill of Materials (SBOM)
Every VictoriaMetrics container{{% available_from "v1.137.0" %}} image published to
[Docker Hub](https://hub.docker.com/u/victoriametrics) and [Quay.io](https://quay.io/organization/victoriametrics) include an [SPDX](https://spdx.dev/) SBOM attestation generated automatically by BuildKit during `docker buildx build`.
To inspect the SBOM for an image:
```sh
docker buildx imagetools inspect \
docker.io/victoriametrics/victoria-metrics:latest \
--format "{{ json .SBOM }}"
```
To scan an image using its SBOM attestation with [Trivy](https://github.com/aquasecurity/trivy):
```sh
trivy image --sbom-sources oci \
docker.io/victoriametrics/victoria-metrics:latest
```
### Reporting a Vulnerability
Please report any security issues to <security@victoriametrics.com>
### CVE handling policy
**Source code:** Go dependencies are scanned by [govulncheck](https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck) in CI.
All vulnerabilities must be fixed before the next scheduled release and backported to [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/).
**Docker images:** CVE findings in the [Alpine](https://security.alpinelinux.org/) base image pose minimal risk since VictoriaMetrics binaries are statically compiled with no OS dependencies.
When detected, only the Alpine base tag is updated.
Releases proceed as planned even if upstream fixes are not yet available.
For maximum security, hardened [scratch](https://hub.docker.com/_/scratch)-based images are also provided.
All images are continuously scanned by Docker Hub and verified before release using [grype](https://github.com/anchore/grype).
### General security recommendations:
* All VictoriaMetrics components must run in protected private networks without direct access from untrusted networks such as the Internet.
* All the VictoriaMetrics components must run in protected private networks without direct access from untrusted networks such as Internet.
The exception is [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) and [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/),
which are intended for serving public requests and performing authorization with [TLS termination](https://en.wikipedia.org/wiki/TLS_termination_proxy).
* All the requests from untrusted networks to VictoriaMetrics components must go through an auth proxy, such as [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/)
* All the requests from untrusted networks to VictoriaMetrics components must go through auth proxy such as [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/)
or [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/). The proxy must be set up with proper authentication and authorization.
* Prefer using lists of allowed API endpoints, while disallowing access to other endpoints when configuring [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/)
in front of VictoriaMetrics components.
* Set a reasonable [`Strict-Transport-Security`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security) header value on all the components to mitigate [MitM attacks](https://en.wikipedia.org/wiki/Man-in-the-middle_attack), for example: `max-age=31536000; includeSubDomains`. See `-http.header.hsts` flag.
* Set reasonable [`Strict-Transport-Security`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security) header value to all the components to mitigate [MitM attacks](https://en.wikipedia.org/wiki/Man-in-the-middle_attack), for example: `max-age=31536000; includeSubDomains`. See `-http.header.hsts` flag.
* Set reasonable [`Content-Security-Policy`](https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP) header value to mitigate [XSS attacks](https://en.wikipedia.org/wiki/Cross-site_scripting). See `-http.header.csp` flag.
* Set reasonable [`X-Frame-Options`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options) header value to mitigate [clickjacking attacks](https://en.wikipedia.org/wiki/Clickjacking), for example `DENY`. See `-http.header.frameOptions` flag.
The following security-related command-line flags are available for all components with HTTP API:
VictoriaMetrics provides the following security-related command-line flags:
* `-tls`, `-tlsCertFile` and `-tlsKeyFile` for switching from HTTP to HTTPS at `-httpListenAddr`.
* `-tls`, `-tlsCertFile` and `-tlsKeyFile` for switching from HTTP to HTTPS at `-httpListenAddr` (TCP port 8428 is listened by default).
[Enterprise version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/enterprise/) supports automatic issuing of TLS certificates.
See [these docs](#automatic-issuing-of-tls-certificates).
* `-mtls` and `-mtlsCAFile` for enabling [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication) for requests to `-httpListenAddr`. See [these docs](#mtls-protection).
* `-httpAuth.username` and `-httpAuth.password` for protecting all the HTTP endpoints
with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
* `-http.header.hsts`, `-http.header.csp`, and `-http.header.frameOptions` for serving `Strict-Transport-Security`, `Content-Security-Policy`
and `X-Frame-Options` HTTP response headers.
### Protecting service endpoints
All VictoriaMetrics components expose internal metrics in Prometheus exposition format at the `/metrics` page for [#Monitoring](https://docs.victoriametrics.com/victoriametrics/#monitoring).
Consider limiting access to the `/metrics` page to trusted networks only.
The following service endpoints may require protection:
* `-deleteAuthKey` for protecting the `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
* `-snapshotAuthKey` for protecting the `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
* `-forceFlushAuthKey` for protecting the `/internal/force_flush` endpoint. See [force flush docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#forced-flush).
* `-forceMergeAuthKey` for protecting the `/internal/force_merge` endpoint. See [force merge docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#forced-merge).
* `-search.resetCacheAuthKey` for protecting the `/internal/resetRollupResultCache` endpoint. See [backfilling](#backfilling) for more details.
* `-reloadAuthKey` for protecting the `/-/reload` endpoint, which is used to force reload the [`-promscrape.config`](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
* `-reloadAuthKey` for protecting the `/-/reload` endpoint, which is used for force reloading of [`-promscrape.config`](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
* `-configAuthKey` for protecting the `/config` endpoint, since it may contain sensitive information such as passwords.
* `-flagsAuthKey` for protecting the `/flags` endpoint.
* `-pprofAuthKey` for protecting the `/debug/pprof/*` endpoints, which can be used for [profiling](#profiling).
* `-metricNamesStatsResetAuthKey` for protecting the `/api/v1/admin/status/metric_names_stats/reset` endpoint, used for [Metric Names Tracker](#track-ingested-metrics-usage).
* `-denyQueryTracing` for disallowing [query tracing](#query-tracing).
* `-http.header.hsts`, `-http.header.csp`, and `-http.header.frameOptions` for serving `Strict-Transport-Security`, `Content-Security-Policy`
and `X-Frame-Options` HTTP response headers.
Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`. This protects from unexpected requests from untrusted network interfaces.
@@ -1776,6 +1777,17 @@ For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<i
See also [security recommendation for VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#security)
and [the general security page at VictoriaMetrics website](https://victoriametrics.com/security/).
### CVE handling policy
**Source code:** Go dependencies are scanned by [govulncheck](https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck) in CI.
All vulnerabilities must be fixed before next scheduled release and backported to [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/).
**Docker images:** CVE findings in [Alpine](https://security.alpinelinux.org/) base image pose minimal risk since VictoriaMetrics binaries are statically compiled with no OS dependencies.
When detected, only the Alpine base tag is updated.
Releases proceed as planned even if upstream fixes are not yet available.
For maximum security, hardened [scratch](https://hub.docker.com/_/scratch)-based images are also provided.
All images are continuously scanned by Docker Hub and verified before release using [grype](https://github.com/anchore/grype).
### mTLS protection
By default `VictoriaMetrics` accepts http requests at `8428` port (this port can be changed via `-httpListenAddr` command-line flags).
@@ -1805,39 +1817,19 @@ This functionality can be evaluated for free according to [these docs](https://d
See also [security recommendations](#security).
### Software Bill of Materials (SBOM)
Every VictoriaMetrics container{{% available_from "v1.137.0" %}} image published to
[Docker Hub](https://hub.docker.com/u/victoriametrics) and [Quay.io](https://quay.io/organization/victoriametrics) include an [SPDX](https://spdx.dev/) SBOM attestation generated automatically by BuildKit during `docker buildx build`.
To inspect the SBOM for an image:
```sh
docker buildx imagetools inspect \
docker.io/victoriametrics/victoria-metrics:latest \
--format "{{ json .SBOM }}"
```
To scan an image using its SBOM attestation with [Trivy](https://github.com/aquasecurity/trivy):
```sh
trivy image --sbom-sources oci \
docker.io/victoriametrics/victoria-metrics:latest
```
## Tuning
* No need to tune for VictoriaMetrics - it uses reasonable defaults for command-line flags,
* No need in tuning for VictoriaMetrics - it uses reasonable defaults for command-line flags,
which are automatically adjusted for the available CPU and RAM resources.
* No need to tune for Operating System - VictoriaMetrics is optimized for default OS settings.
* No need in tuning for Operating System - VictoriaMetrics is optimized for default OS settings.
The only option is increasing the limit on [the number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a).
The recommendation is not specific to VictoriaMetrics only, but also for any service that handles many HTTP connections and stores data on disk.
* VictoriaMetrics is a write-heavy application, and its performance depends on disk performance. So be careful with other
The recommendation is not specific for VictoriaMetrics only but also for any service which handles many HTTP connections and stores data on disk.
* VictoriaMetrics is a write-heavy application and its performance depends on disk performance. So be careful with other
applications or utilities (like [fstrim](https://manpages.ubuntu.com/manpages/lunar/en/man8/fstrim.8.html))
which could [exhaust disk resources](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1521).
* The recommended filesystem is `ext4`, the recommended persistent storage is [persistent HDD-based disk on GCP](https://cloud.google.com/compute/docs/disks/#pdspecs),
since it is protected from hardware failures via internal replication, and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
If you plan to store more than 1TB of data on an `ext4` partition, then the following options are recommended to pass to `mkfs.ext4`:
since it is protected from hardware failures via internal replication and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
If you plan to store more than 1TB of data on `ext4` partition, then the following options are recommended to pass to `mkfs.ext4`:
```sh
mkfs.ext4 ... -O 64bit,huge_file,extent -T huge

View File

@@ -27,6 +27,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
## tip
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See PR [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808) for details.
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): add client side round-robin load-balancing with `DNS` discovery. See [#2388](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2388) and these [vmagent DNS URLs](https://docs.victoriametrics.com/victoriametrics/vmagent/#dns-urls), [vmalert DNS URLs](https://docs.victoriametrics.com/victoriametrics/vmalert/#dns-urls).
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)

View File

@@ -431,6 +431,43 @@ and `-remoteWrite.streamAggr.config`:
There is also the `-promscrape.configCheckInterval` command-line flag, which can be used to automatically reload configs from the updated `-promscrape.config` file.
## DNS URLs
If `vmagent` encounters URLs with the `dns+` prefix in the hostname (such as `http://dns+some-addr:8428/some/path`), it resolves `some-addr` into IP addresses
via [DNS A records](https://datatracker.ietf.org/doc/html/rfc1035#section-3.4.1). The port from the original URL is appended to each discovered IP address.
Each discovered IP address is used for round-robin balancing of write requests.
DNS URLs are supported in the following places:
* In `-remoteWrite.url` command-line flag. For example, if `victoria-metrics` [DNS A Record](https://datatracker.ietf.org/doc/html/rfc1035#section-3.4.1) record contains
`192.168.1.15` IP address, then `-remoteWrite.url=http://dns+victoria-metrics:8428/api/v1/write` is automatically resolved into
`-remoteWrite.url=http://192.168.1.15:8428/api/v1/write`.
DNS URLs are useful when client-side HTTP load balancing is needed. A good example
is a [Kubernetes headless Service](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services),
which returns multiple IP addresses for a single hostname.
### DNS URLs and HTTPS
When a `dns+` URL uses the `https` scheme, `vmagent` connects to the discovered
IP addresses directly. This affects [TLS](https://en.wikipedia.org/wiki/Transport_Layer_Security)
in two ways:
* No [SNI](https://en.wikipedia.org/wiki/Server_Name_Indication) is sent in the TLS handshake,
since the connection target is an IP address rather than a hostname.
* The server certificate is verified against the IP address, so the verification fails
unless the certificate contains the corresponding
[IP SAN](https://en.wikipedia.org/wiki/Subject_Alternative_Name) entries.
To use `dns+` URLs with HTTPS, pass the original hostname via the `-remoteWrite.tlsServerName`
command-line flag. It is used both as SNI and as the name the server certificate
is verified against:
```sh
-remoteWrite.url=https://dns+victoria-metrics:8428/api/v1/write
-remoteWrite.tlsServerName=victoria-metrics
```
## SRV URLs
If `vmagent` encounters URLs with `srv+` prefix in hostname (such as `http://srv+some-addr/some/path`), then it resolves `some-addr` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record)
@@ -441,7 +478,7 @@ SRV URLs are supported in the following places:
* In `-remoteWrite.url` command-line flag. For example, if `victoria-metrics` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) record contains
`victoria-metrics-host:8428` TCP address, then `-remoteWrite.url=http://srv+victoria-metrics/api/v1/write` is automatically resolved into
`-remoteWrite.url=http://victoria-metrics-host:8428/api/v1/write`. If the DNS SRV record is resolved into multiple TCP addresses, then `vmagent`
uses a randomly chosen address for each connection it establishes to the remote storage.
performs per request round-robin load-balancing.
* In scrape target addresses aka `__address__` label. See [these docs](https://docs.victoriametrics.com/victoriametrics/relabeling/#how-to-modify-scrape-urls-in-targets) for details.

View File

@@ -1470,6 +1470,59 @@ alert_relabel_configs:
The configuration file can be [hot-reloaded](#hot-config-reload).
## DNS URLs
If `vmalert` encounters URLs with the `dns+` prefix in the hostname (such as `http://dns+some-addr:8428/some/path`), it resolves `some-addr` into IP addresses
via [DNS A records](https://datatracker.ietf.org/doc/html/rfc1035#section-3.4.1). The port from the original URL is appended to each discovered IP address.
Each discovered IP address is used for round-robin balancing of write requests.
DNS URLs are supported in the following places:
* In `-remoteWrite.url`, `-remoteRead.url` and `-datasource.url` command-line flags. For example, if `victoria-metrics` [DNS A Record](https://datatracker.ietf.org/doc/html/rfc1035#section-3.4.1) record contains
`192.168.1.15` IP address, then `-remoteWrite.url=http://dns+victoria-metrics:8428` is automatically resolved into
`-remoteWrite.url=http://192.168.1.15:8428`.
DNS URLs are useful when client-side HTTP load balancing is needed. A good example
is a [Kubernetes headless Service](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services),
which returns multiple IP addresses for a single hostname.
### DNS URLs and HTTPS
When a `dns+` URL uses the `https` scheme, `vmalert` connects to the discovered
IP addresses directly. No [SNI](https://en.wikipedia.org/wiki/Server_Name_Indication)
is sent in the TLS handshake, and the server certificate is verified against the IP address,
which fails unless the certificate contains the corresponding
[IP SAN](https://en.wikipedia.org/wiki/Subject_Alternative_Name) entries.
To use `dns+` URLs with HTTPS, pass the original hostname via the corresponding
`tlsServerName` command-line flag - `-datasource.tlsServerName`, `-remoteRead.tlsServerName`
or `-remoteWrite.tlsServerName`. It is used both as SNI and as the name the server
certificate is verified against:
```sh
-datasource.url=https://dns+victoria-metrics:8428
-datasource.tlsServerName=victoria-metrics
```
Alternatively, issue server certificates with IP SAN entries for every backend IP address.
Avoid `tlsInsecureSkipVerify` flags for working around this, since they disable
server certificate verification completely.
## SRV URLs
If `vmalert` encounters URLs with `srv+` prefix in hostname (such as `http://srv+some-addr/some/path`), then it resolves `some-addr` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record)
record into TCP address with hostname and TCP port, and then use the resulting URL when it needs to connect to it.
SRV URLs are supported in the following places:
* In `-remoteWrite.url`, `-remoteRead.url` and `-datasource.url` command-line flags. For example, if `victoria-metrics` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) record contains
`victoria-metrics-host:8085`, then `-remoteWrite.url=http://srv+victoria-metrics:8428` is automatically resolved into
`-remoteWrite.url=http://victoria-metrics-host:8085`. If the DNS SRV record is resolved into multiple TCP addresses, then `vmalert`
performs per request round-robin load-balancing.
SRV URLs are useful when HTTP services run on different TCP ports or when their TCP ports can change over time (for instance, after a restart).
## Contributing
`vmalert` is mostly designed and built by VictoriaMetrics community.

View File

@@ -0,0 +1,203 @@
package httputil
import (
"context"
"errors"
"fmt"
"math/rand"
"net"
"net/http"
"net/netip"
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
)
// NewLoadBalancerTransport returns new RoundTripper that performs round-robin HTTP requests loadbalancing
// based on discovered backends for the given url host
// and update url with load-balancing prefix
//
// It returns origin transport and url if load-balancing is not needed for given url
func NewLoadBalancerTransport(origin http.RoundTripper, originURL *url.URL) (http.RoundTripper, *url.URL) {
modifiedURL := *originURL
var discoverFunc func(context.Context, string, string) ([]string, error)
switch {
case strings.HasPrefix(originURL.Host, "dns+"):
modifiedURL.Host = modifiedURL.Host[4:]
discoverFunc = discoverDNSBackends
case strings.HasPrefix(originURL.Host, "srv+"):
modifiedURL.Host = modifiedURL.Host[4:]
discoverFunc = discoverSRVBackends
default:
return origin, originURL
}
host, port, err := net.SplitHostPort(modifiedURL.Host)
if err != nil {
host = originURL.Host
port = "80"
if modifiedURL.Scheme == "https" {
port = "443"
}
}
t := &loadbalancerTransport{
tr: origin,
host: host,
port: port,
discoverFunc: discoverFunc,
}
t.discoverBackendsLocked(context.Background())
return t, &modifiedURL
}
type loadbalancerTransport struct {
tr http.RoundTripper
host string
port string
discoverFunc func(context.Context, string, string) ([]string, error)
// mu protects fields below
mu sync.Mutex
lastDiscoveredAt time.Time
dbs *discoveredBackends
}
type discoveredBackends struct {
backends []string
idx uint64
}
// RoundTrip implements http.RoundTripper interface
func (lb *loadbalancerTransport) RoundTrip(r *http.Request) (*http.Response, error) {
backend := lb.pickBackend(r.Context(), false)
if backend == "" {
return nil, fmt.Errorf("no backends found for hostname=%q", lb.host)
}
r2 := r.Clone(r.Context())
r2.URL.Host = backend
if r2.Host == "" {
r2.Host = r.URL.Host
}
resp, err := lb.tr.RoundTrip(r2)
if err != nil {
var dnsErr *net.DNSError
// perform a single retry for in case of trivial error or dns lookup error
if !netutil.IsTrivialNetworkError(err) && (errors.As(err, &dnsErr) && !dnsErr.IsNotFound) {
return nil, err
}
backend := lb.pickBackend(r.Context(), true)
if backend == "" {
return nil, fmt.Errorf("no backends found for hostname=%q", lb.host)
}
// perform the same check for retry as http.Request.isReplayable does
canRetry := r.Body == nil || r.Body == http.NoBody || r.GetBody != nil
if !canRetry {
return nil, err
}
r2 = r.Clone(r.Context())
if r.GetBody != nil {
body, berr := r.GetBody()
if berr != nil {
return nil, err
}
r2.Body = body
}
if r2.Host == "" {
r2.Host = r.URL.Host
}
r2.URL.Host = backend
resp, err = lb.tr.RoundTrip(r2)
}
return resp, err
}
func (lb *loadbalancerTransport) pickBackend(ctx context.Context, forceDiscovery bool) string {
ct := time.Now()
lb.mu.Lock()
defer lb.mu.Unlock()
if forceDiscovery && !ct.Before(lb.lastDiscoveredAt) {
// prevent concurrent force discovery
lb.lastDiscoveredAt = time.Time{}
}
if lb.dbs == nil || ct.Sub(lb.lastDiscoveredAt) > 5*time.Second {
lb.discoverBackendsLocked(ctx)
}
if lb.dbs == nil || len(lb.dbs.backends) == 0 {
return ""
}
idx := lb.dbs.idx
lb.dbs.idx++
return lb.dbs.backends[idx%uint64(len(lb.dbs.backends))]
}
func (lb *loadbalancerTransport) discoverBackendsLocked(ctx context.Context) {
lb.lastDiscoveredAt = time.Now()
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
backends, err := lb.discoverFunc(ctx, lb.host, lb.port)
if err != nil {
logger.Errorf("cannot discover backends: %s", err)
return
}
rand.Shuffle(len(backends), func(i, j int) {
backends[i], backends[j] = backends[j], backends[i]
})
dbs := &discoveredBackends{
backends: backends,
}
lb.dbs = dbs
}
func discoverDNSBackends(ctx context.Context, host, port string) ([]string, error) {
addrs, err := netutil.Resolver.LookupIPAddr(ctx, host)
if err != nil {
return nil, fmt.Errorf("failed to lookupIPAddr for host: %q: %w", host, err)
}
backends := make([]string, 0, len(addrs))
for _, addr := range addrs {
if !netutil.TCP6Enabled() {
ip, ok := netip.AddrFromSlice(addr.IP)
if !ok {
logger.Panicf("BUG: cannot build netip Addr from slice addr: %q", addr.IP.String())
}
if !ip.Unmap().Is4() {
continue
}
}
ip := addr.IP.String()
if len(port) > 0 {
ip = net.JoinHostPort(ip, port)
}
backends = append(backends, ip)
}
return backends, nil
}
func discoverSRVBackends(ctx context.Context, host, port string) ([]string, error) {
_, addrs, err := netutil.Resolver.LookupSRV(ctx, "", "", host)
if err != nil {
return nil, fmt.Errorf("failed to LookupSRV records for host: %q: %w", host, err)
}
backends := make([]string, 0, len(addrs))
for _, addr := range addrs {
hostPort := port
if addr.Port > 0 {
hostPort = strconv.FormatUint(uint64(addr.Port), 10)
}
backend := net.JoinHostPort(addr.Target, hostPort)
backends = append(backends, backend)
}
return backends, nil
}

View File

@@ -0,0 +1,133 @@
package httputil
import (
"context"
"fmt"
"net"
"net/http"
"net/netip"
"net/url"
"sync"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
)
type testRemoteServer struct {
mu sync.Mutex
requestsPerHost map[string]int
totalRequests int
firstError error
}
func (trs *testRemoteServer) RoundTrip(r *http.Request) (*http.Response, error) {
trs.mu.Lock()
if trs.firstError != nil && trs.totalRequests == 0 {
err := trs.firstError
trs.firstError = nil
trs.totalRequests++
trs.mu.Unlock()
return nil, err
}
trs.totalRequests++
if trs.requestsPerHost == nil {
trs.requestsPerHost = make(map[string]int)
}
trs.requestsPerHost[r.URL.Host]++
trs.mu.Unlock()
return &http.Response{StatusCode: http.StatusOK, Body: http.NoBody}, nil
}
type testDNSResolver struct {
ips []net.IPAddr
}
func (tdr *testDNSResolver) LookupSRV(_ context.Context, _, _, name string) (cname string, addrs []*net.SRV, err error) {
return "", nil, fmt.Errorf("unexpected LookupMX call for name=%q", name)
}
func (tdr *testDNSResolver) LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error) {
return tdr.ips, nil
}
func (tdr *testDNSResolver) LookupMX(_ context.Context, name string) ([]*net.MX, error) {
return nil, fmt.Errorf("unexpected LookupMX call for name=%q", name)
}
func TestLoadbalancerTransport(t *testing.T) {
f := func(discoveredIPs []string, trs *testRemoteServer) {
t.Helper()
parsedIPs := make([]net.IPAddr, 0, len(discoveredIPs))
for _, dIP := range discoveredIPs {
pIP, err := netip.ParseAddr(dIP)
if err != nil {
t.Fatalf("cannot parse IP=%q: %s", dIP, err)
}
parsedIPs = append(parsedIPs, net.IPAddr{IP: pIP.AsSlice()})
}
tdr := &testDNSResolver{ips: parsedIPs}
originResolver := netutil.Resolver
defer func() { netutil.Resolver = originResolver }()
netutil.Resolver = tdr
requestURL, err := url.Parse("http://dns+vmsingle.example.com:8429/api/v1/write")
if err != nil {
t.Fatalf("cannot parse url: %s", err)
}
lbt, requestURL := NewLoadBalancerTransport(trs, requestURL)
if len(discoveredIPs) == 0 {
r, err := http.NewRequest(http.MethodGet, requestURL.String(), nil)
if err != nil {
t.Fatalf("cannot create http request: %s", err)
}
_, err = lbt.RoundTrip(r)
if err == nil {
t.Fatalf("expected no backends found error")
}
return
}
expectedRequestsPerHost := 2
for range len(discoveredIPs) * expectedRequestsPerHost {
r, err := http.NewRequest(http.MethodGet, requestURL.String(), nil)
if err != nil {
t.Fatalf("cannot create http request: %s", err)
}
resp, err := lbt.RoundTrip(r)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
resp.Body.Close()
}
requestsPerHost := trs.requestsPerHost
for _, dIP := range discoveredIPs {
expectedHostPort := net.JoinHostPort(dIP, "8429")
gotRequestsPerHost, ok := requestsPerHost[expectedHostPort]
if !ok {
t.Fatalf("not found expected backend request for: %q", expectedHostPort)
}
if gotRequestsPerHost != expectedRequestsPerHost {
t.Fatalf("unexpected requests per host: %d:%d (-;+)", expectedRequestsPerHost, gotRequestsPerHost)
}
}
}
trs := testRemoteServer{}
f([]string{"1.1.1.1"}, &trs)
trs = testRemoteServer{}
f([]string{"1.1.1.1", "2.2.2.2", "5.5.5.5"}, &trs)
// retry dns resolve error
trs = testRemoteServer{
firstError: &net.DNSError{Err: "no such host", IsNotFound: true},
}
f([]string{"1.1.1.1", "2.2.2.2", "5.5.5.5"}, &trs)
// empty backends, expecting error
trs = testRemoteServer{}
f([]string{}, &trs)
}