Compare commits

...

2 Commits

Author SHA1 Message Date
Zakhar Bessarab
c157576c2f app/vmalert/notifier: fix test after b1e998f7
Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2025-02-06 15:02:47 +04:00
Zakhar Bessarab
b1e998f7ad app/vmalert/utils: unregister metrics only if there is no refs left
Currently, when performing rules reload vmalert treats interval change as a new group. This leads to previous group being closed.
When group is closed it also unregisters metrics related to the group.

The problem is that newly created group will still use metrics with the same names as name only includes "file" and "group name" as labels and these are the same.

This commit introduces a "reference tracking" for metric names and prevents unregistering metrics if metric name is still in use.

See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8229
Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2025-02-06 14:49:26 +04:00
4 changed files with 106 additions and 5 deletions

View File

@@ -2,6 +2,7 @@ package notifier
import (
"context"
"fmt"
"testing"
"time"
@@ -27,10 +28,12 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
}
func TestBlackHoleNotifier_Close(t *testing.T) {
addr := "blackhole-close"
bh := newBlackHoleNotifier()
bh.addr = addr
if err := bh.Send(context.Background(), []Alert{{
GroupID: 0,
Name: "alert0",
Name: "alert1",
Start: time.Now().UTC(),
End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
@@ -41,10 +44,10 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
bh.Close()
defaultMetrics := metricset.GetDefaultSet()
alertMetricName := "vmalert_alerts_sent_total{addr=\"blackhole\"}"
alertMetricName := fmt.Sprintf("vmalert_alerts_sent_total{addr=%q}", addr)
for _, name := range defaultMetrics.ListMetricNames() {
if name == alertMetricName {
t.Fatalf("Metric name should have unregistered.But still present")
t.Fatalf("Metric name should have unregistered. But still present")
}
}
}

View File

@@ -1,14 +1,56 @@
package utils
import "github.com/VictoriaMetrics/metrics"
import (
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
type namedMetric struct {
Name string
}
var usedMetrics map[string]*atomic.Int64
var usedMetricMu sync.Mutex
func trackUsedMetric(name string) {
usedMetricMu.Lock()
defer usedMetricMu.Unlock()
if usedMetrics == nil {
usedMetrics = make(map[string]*atomic.Int64)
}
if _, ok := usedMetrics[name]; !ok {
usedMetrics[name] = &atomic.Int64{}
}
usedMetrics[name].Add(1)
}
// Unregister removes the metric by name from default registry
func (nm namedMetric) Unregister() {
metrics.UnregisterMetric(nm.Name)
if usedMetrics == nil {
logger.Fatalf("BUG: unregistered metric %q before registering", nm.Name)
}
usedMetricMu.Lock()
counter, ok := usedMetrics[nm.Name]
if !ok {
logger.Fatalf("BUG: unregistered metric %q before registering", nm.Name)
}
current := counter.Add(-1)
usedMetricMu.Unlock()
if current < 0 {
logger.Fatalf("BUG: negative metric counter for %q", nm.Name)
}
if current == 0 {
metrics.UnregisterMetric(nm.Name)
}
}
// Gauge is a metrics.Gauge with Name
@@ -19,6 +61,7 @@ type Gauge struct {
// GetOrCreateGauge creates a new Gauge with the given name
func GetOrCreateGauge(name string, f func() float64) *Gauge {
trackUsedMetric(name)
return &Gauge{
namedMetric: namedMetric{Name: name},
Gauge: metrics.GetOrCreateGauge(name, f),
@@ -33,6 +76,7 @@ type Counter struct {
// GetOrCreateCounter creates a new Counter with the given name
func GetOrCreateCounter(name string) *Counter {
trackUsedMetric(name)
return &Counter{
namedMetric: namedMetric{Name: name},
Counter: metrics.GetOrCreateCounter(name),
@@ -47,6 +91,7 @@ type Summary struct {
// GetOrCreateSummary creates a new Summary with the given name
func GetOrCreateSummary(name string) *Summary {
trackUsedMetric(name)
return &Summary{
namedMetric: namedMetric{Name: name},
Summary: metrics.GetOrCreateSummary(name),

View File

@@ -0,0 +1,52 @@
package utils
import (
"testing"
"github.com/VictoriaMetrics/metrics"
)
func isMetricRegistered(name string) bool {
metricNames := metrics.GetDefaultSet().ListMetricNames()
for _, mn := range metricNames {
if mn == name {
return true
}
}
return false
}
func TestMetricIsUnregistered(t *testing.T) {
metricName := "example_runs_total"
c := GetOrCreateCounter(metricName)
if !isMetricRegistered(metricName) {
t.Errorf("Expected metric %s to be present", metricName)
}
c.Unregister()
if isMetricRegistered(metricName) {
t.Errorf("Expected metric %s to be unregistered", metricName)
}
}
func TestMetricIsRemovedIfNoUses(t *testing.T) {
metricName := "example_runs_total"
c := GetOrCreateCounter(metricName)
c2 := GetOrCreateCounter(metricName)
if !isMetricRegistered(metricName) {
t.Errorf("Expected metric %s to be present", metricName)
}
c.Unregister()
// metric should still be registered since c2 is using it
if !isMetricRegistered(metricName) {
t.Errorf("Expected metric %s to be present", metricName)
}
c2.Unregister()
if isMetricRegistered(metricName) {
t.Errorf("Expected metric %s to be unregistered", metricName)
}
}

View File

@@ -29,6 +29,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui) for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/enterprise.html) components: properly display enterprise features when the enterprise version is used.
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and [vmselect](https://docs.victoriametrics.com/cluster-victoriametrics/): fix discrepancies when using `or` binary operator. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7759) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7640) issues for details.
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): properly update number of unique series for [cardinality limiter](https://docs.victoriametrics.com/#cardinality-limiter) on ingestion. Previously, limit could undercount the real number of the ingested unique series.
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert/): do not unregister group metrics if the group is still in use. Previously, this could lead to group metrics being absent even though rules group is still running. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8229) for details.
## [v1.102.12](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.12)