mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-17 00:26:36 +03:00
lib/protoparser/prometheus: adds Prometheus 3.0 utf-8 quoted syntax support
This commit properly parses Prometheus 3.0 text exposition format. Which adds new quoted version of metric name and label names with `utf-8` characters. See the following doc: https://github.com/prometheus/proposals/blob/main/proposals/2023-08-21-utf8.md#syntax-examples Related PR: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8692
This commit is contained in:
@@ -25,6 +25,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): cancel currently running operation if graceful shutdown was requested. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8554).
|
||||
* FEATURE: [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): display completion status in `/api/v1/backups` API response and `backup list` command. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5361).
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `vm_retention_filters_partitions_scheduled` and `vm_retention_filters_partitions_scheduled_size_bytes` gauge metrics to reflect [retention filters](https://docs.victoriametrics.com/#retention-filters) process.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/), [vmagent](https://docs.victoriametrics.com/vmagent/): add support for Prometheus 3.0 utf-8 quoted labels during scraping. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8692) for details.
|
||||
|
||||
* BUGFIX: [vmalert-tool](https://docs.victoriametrics.com/victoriametrics/vmalert-tool/): fix parsing for (+/-)Inf values and scientific notation in `values` field. Thanks to @evkuzin for [#8847](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8847).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): use `retentionFilter` flag name in debugging interface to make it consistent with flag definition. Previously, flag name in debugging interface was different from command-line configuration so copying command-line flags for debugging produced an error. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8697).
|
||||
|
||||
@@ -238,7 +238,8 @@ func TestNewLabelsFromStringFailure(t *testing.T) {
|
||||
f(`foo{bar="baz`)
|
||||
f(`foo{bar="baz"`)
|
||||
f(`foo{bar="baz",`)
|
||||
f(`foo{"bar"="baz"}`)
|
||||
// This will no longer fail with support of Prometheus 3.0 quoted UTF8 labels
|
||||
//f(`foo{"bar"="baz"}`)
|
||||
f(`{"bar":"baz"}`)
|
||||
f(`{bar:"baz"}`)
|
||||
f(`{bar=~"baz"}`)
|
||||
|
||||
@@ -7,9 +7,10 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/valyala/fastjson/fastfloat"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// Rows contains parsed Prometheus rows.
|
||||
@@ -113,7 +114,7 @@ func (r *Row) unmarshal(s string, tagsPool []Tag, noEscapes bool) ([]Tag, error)
|
||||
s = s[n+1:]
|
||||
tagsStart := len(tagsPool)
|
||||
var err error
|
||||
s, tagsPool, err = unmarshalTags(tagsPool, s, noEscapes)
|
||||
s, tagsPool, err = r.unmarshalTags(tagsPool, s, noEscapes)
|
||||
if err != nil {
|
||||
return tagsPool, fmt.Errorf("cannot unmarshal tags: %w", err)
|
||||
}
|
||||
@@ -230,43 +231,91 @@ func unmarshalRow(dst []Row, s string, tagsPool []Tag, noEscapes bool, errLogger
|
||||
|
||||
var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="prometheus"}`)
|
||||
|
||||
func unmarshalTags(dst []Tag, s string, noEscapes bool) (string, []Tag, error) {
|
||||
// unmarshalQuotedString parses quoted string tags
|
||||
// prometheus added support of utf-8 encoding for the text exposition format
|
||||
// https://github.com/prometheus/proposals/blob/main/proposals/2023-08-21-utf8.md#syntax-examples
|
||||
func unmarshalQuotedString(s string, noEscapes bool) (string, string, error) {
|
||||
if len(s) == 0 || s[0] != '"' {
|
||||
return "", s, fmt.Errorf("missing starting double quote in string: %q", s)
|
||||
}
|
||||
var n int
|
||||
if noEscapes {
|
||||
n = strings.IndexByte(s[1:], '"')
|
||||
if n == -1 {
|
||||
return "", s, fmt.Errorf("missing closing double quote in string: %q", s)
|
||||
}
|
||||
// Add 2 to account for both quotes
|
||||
return s[1 : n+1], s[n+2:], nil
|
||||
}
|
||||
n = findClosingQuote(s)
|
||||
if n == -1 {
|
||||
return "", s, fmt.Errorf("missing closing double quote in string: %q", s)
|
||||
}
|
||||
return unescapeValue(s[1:n]), s[n+1:], nil
|
||||
}
|
||||
|
||||
func (r *Row) unmarshalTags(dst []Tag, s string, noEscapes bool) (string, []Tag, error) {
|
||||
var err error
|
||||
for {
|
||||
s = skipLeadingWhitespace(s)
|
||||
if len(s) > 0 && s[0] == '}' {
|
||||
// End of tags found.
|
||||
return s[1:], dst, nil
|
||||
}
|
||||
n := strings.IndexByte(s, '=')
|
||||
n := strings.IndexByte(s, '"')
|
||||
if n < 0 {
|
||||
// end of tags
|
||||
if len(s) > 0 && s[0] == '}' {
|
||||
return s[1:], dst, nil
|
||||
}
|
||||
return s, dst, fmt.Errorf("missing value for tag %q", s)
|
||||
}
|
||||
key := skipTrailingWhitespace(s[:n])
|
||||
if strings.IndexByte(key, '"') >= 0 {
|
||||
return s, dst, fmt.Errorf("tag key %q cannot contain double quotes", key)
|
||||
}
|
||||
s = skipLeadingWhitespace(s[n+1:])
|
||||
if len(s) == 0 || s[0] != '"' {
|
||||
return s, dst, fmt.Errorf("expecting quoted value for tag %q; got %q", key, s)
|
||||
}
|
||||
value := s[1:]
|
||||
if noEscapes {
|
||||
// Fast path - the line has no escape chars
|
||||
n = strings.IndexByte(value, '"')
|
||||
if n < 0 {
|
||||
return s, dst, fmt.Errorf("missing closing quote for tag value %q", s)
|
||||
// Determine if this is a value or quoted label
|
||||
possibleKey := skipTrailingWhitespace(s[:n])
|
||||
possibleKeyLen := len(possibleKey)
|
||||
key := ""
|
||||
if possibleKeyLen == 0 {
|
||||
// Parse quoted label - {"label"="value"} or {"metric"}
|
||||
key, s, err = unmarshalQuotedString(s, noEscapes)
|
||||
if err != nil {
|
||||
return s, dst, err
|
||||
}
|
||||
s = value[n+1:]
|
||||
value = value[:n]
|
||||
s = skipLeadingWhitespace(s)
|
||||
if len(s) > 0 {
|
||||
if s[0] == ',' || s[0] == '}' {
|
||||
// quoted metric name {"metric_name"}
|
||||
if r.Metric != "" {
|
||||
return s, dst, fmt.Errorf("metric name %q already set, duplicate metric name %q", r.Metric, key)
|
||||
}
|
||||
r.Metric = key
|
||||
if len(s) > 1 && s[0] == ',' {
|
||||
s = s[1:]
|
||||
}
|
||||
continue
|
||||
} else if s[0] != '=' {
|
||||
// We are a quoted label that isn't preceded by a comma or at the end
|
||||
// of the tags so we must have a value
|
||||
return s, dst, fmt.Errorf("missing value for quoted tag %q", key)
|
||||
}
|
||||
s = skipLeadingWhitespace(s[1:])
|
||||
}
|
||||
// Fall through to parsing value
|
||||
} else {
|
||||
// Slow path - the line contains escape chars
|
||||
n = findClosingQuote(s)
|
||||
if n < 0 {
|
||||
return s, dst, fmt.Errorf("missing closing quote for tag value %q", s)
|
||||
c := possibleKey[len(possibleKey)-1]
|
||||
// unquoted label {label="value"}
|
||||
if c == '=' {
|
||||
// Parse unquoted label
|
||||
key = skipLeadingWhitespace(s[:possibleKeyLen-1])
|
||||
key = skipTrailingWhitespace(key)
|
||||
s = skipLeadingWhitespace(s[possibleKeyLen:])
|
||||
} else {
|
||||
// unquoted tag without a value
|
||||
return s, dst, fmt.Errorf("missing value for unquoted tag %q", s)
|
||||
}
|
||||
value = unescapeValue(s[1:n])
|
||||
s = s[n+1:]
|
||||
}
|
||||
// Parse value
|
||||
var value string
|
||||
value, s, err = unmarshalQuotedString(s, noEscapes)
|
||||
if err != nil {
|
||||
return s, dst, err
|
||||
}
|
||||
|
||||
if len(key) > 0 {
|
||||
// Allow empty values (len(value)==0) - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/453
|
||||
if cap(dst) > len(dst) {
|
||||
|
||||
@@ -221,6 +221,12 @@ func TestRowsUnmarshalFailure(t *testing.T) {
|
||||
f(`a{"__name__":"upsd_time_left_ns","host":"myhost", status_OB="true"} 12`)
|
||||
f(`a{host:"myhost"} 12`)
|
||||
f(`a{host:"myhost",foo="bar"} 12`)
|
||||
// invalid quoted UTF8 tags
|
||||
f(`metric_"name"{"foo"="bar"}`)
|
||||
f(`"metric_name"{"name":"name}`)
|
||||
f(`metric_"name{"name":"name"}`)
|
||||
f(`metric{"foo":"bar"}`)
|
||||
f(`{"foo":"bar", "metric"}`)
|
||||
|
||||
// empty metric name
|
||||
f(`{foo="bar"}`)
|
||||
@@ -242,6 +248,13 @@ func TestRowsUnmarshalFailure(t *testing.T) {
|
||||
|
||||
// Invalid timestamp
|
||||
f("foo 123 bar")
|
||||
// metric name defined multiple time
|
||||
f(`{"foo", "foo2", bar="baz"} 1 2`)
|
||||
f(`foobar{"foo", bar="baz"} 1 2`)
|
||||
// missing closing quotes on key
|
||||
f(`{"a", "b = "c"}`)
|
||||
// empty metric name
|
||||
f(`{"a"="ok"} 1`)
|
||||
}
|
||||
|
||||
func TestRowsUnmarshalSuccess(t *testing.T) {
|
||||
@@ -466,6 +479,77 @@ cassandra_token_ownership_ratio 78.9`, &Rows{
|
||||
Timestamp: 2000,
|
||||
}},
|
||||
})
|
||||
// UTF8 Quoted tags
|
||||
f(`foo{"bar"="baz"} 1 2`, &Rows{
|
||||
Rows: []Row{{
|
||||
Metric: "foo",
|
||||
Tags: []Tag{{
|
||||
Key: "bar",
|
||||
Value: "baz",
|
||||
}},
|
||||
Value: 1,
|
||||
Timestamp: 2000,
|
||||
}},
|
||||
})
|
||||
f(`{"foo", "bar"="baz"} 1 2`, &Rows{
|
||||
Rows: []Row{{
|
||||
Metric: "foo",
|
||||
Tags: []Tag{{
|
||||
Key: "bar",
|
||||
Value: "baz",
|
||||
}},
|
||||
Value: 1,
|
||||
Timestamp: 2000,
|
||||
}},
|
||||
})
|
||||
f(`{"foo", "bar"="baf\"y"} 1 2`, &Rows{
|
||||
Rows: []Row{{
|
||||
Metric: "foo",
|
||||
Tags: []Tag{{
|
||||
Key: "bar",
|
||||
Value: `baf"y`,
|
||||
}},
|
||||
Value: 1,
|
||||
Timestamp: 2000,
|
||||
}},
|
||||
})
|
||||
f(`{bar="baz", "foo"} 1 2`, &Rows{
|
||||
Rows: []Row{{
|
||||
Metric: "foo",
|
||||
Tags: []Tag{{
|
||||
Key: "bar",
|
||||
Value: "baz",
|
||||
}},
|
||||
Value: 1,
|
||||
Timestamp: 2000,
|
||||
}},
|
||||
})
|
||||
f(`{"foo"} 1 2`, &Rows{
|
||||
Rows: []Row{{
|
||||
Metric: "foo",
|
||||
Value: 1,
|
||||
Timestamp: 2000,
|
||||
}},
|
||||
})
|
||||
// Special character quoted UTF8 tests
|
||||
f(`{"温度{房间"} 1 2`, &Rows{
|
||||
Rows: []Row{{
|
||||
Metric: "温度{房间",
|
||||
Value: 1,
|
||||
Timestamp: 2000,
|
||||
}},
|
||||
})
|
||||
f(`{"foo", "温度{房间=\"水电费"="baz"} 1 2`, &Rows{
|
||||
Rows: []Row{{
|
||||
Metric: "foo",
|
||||
Tags: []Tag{{
|
||||
Key: `温度{房间="水电费`,
|
||||
Value: "baz",
|
||||
}},
|
||||
Value: 1,
|
||||
Timestamp: 2000,
|
||||
}},
|
||||
})
|
||||
f(`foo{bar="b\"a\\z"} -1.2`, &Rows{
|
||||
Rows: []Row{{
|
||||
Metric: "foo",
|
||||
|
||||
Reference in New Issue
Block a user