mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-17 15:53:29 +03:00
Compare commits
9 Commits
sso2
...
vmestimato
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
24ae4758f2 | ||
|
|
b94dd42126 | ||
|
|
54768be72d | ||
|
|
fe1f2b7c2a | ||
|
|
4f27d60563 | ||
|
|
3d4e8b59fd | ||
|
|
b58c73ac90 | ||
|
|
77efbb2e36 | ||
|
|
e388e41430 |
@@ -209,13 +209,12 @@ func (wr *writeRequest) tryPushMetadata(mms []prompb.MetricMetadata) bool {
|
||||
func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) {
|
||||
// Direct copy for non-string fields, which are safe by value.
|
||||
dst.Type = src.Type
|
||||
dst.Unit = src.Unit
|
||||
|
||||
dst.AccountID = src.AccountID
|
||||
dst.ProjectID = src.ProjectID
|
||||
|
||||
// Pre-allocate memory for all string fields.
|
||||
neededBufLen := len(src.MetricFamilyName) + len(src.Help)
|
||||
neededBufLen := len(src.MetricFamilyName) + len(src.Help) + len(src.Unit)
|
||||
bufLen := len(wr.metadatabuf)
|
||||
wr.metadatabuf = slicesutil.SetLength(wr.metadatabuf, bufLen+neededBufLen)
|
||||
buf := wr.metadatabuf[:bufLen]
|
||||
@@ -230,6 +229,11 @@ func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) {
|
||||
buf = append(buf, src.Help...)
|
||||
dst.Help = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
|
||||
// Copy Unit
|
||||
bufLen = len(buf)
|
||||
buf = append(buf, src.Unit...)
|
||||
dst.Unit = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
|
||||
wr.metadatabuf = buf
|
||||
}
|
||||
|
||||
|
||||
@@ -911,7 +911,8 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
||||
return false, fmt.Errorf("failed to parse auth config: %w", err)
|
||||
}
|
||||
|
||||
jui, oidcDP, err := parseJWTUsers(ac)
|
||||
oidcDP := &oidcDiscovererPool{}
|
||||
jui, err := parseJWTUsers(ac, oidcDP)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
|
||||
}
|
||||
|
||||
@@ -72,9 +72,8 @@ type JWTConfig struct {
|
||||
verifierPool atomic.Pointer[jwt.VerifierPool]
|
||||
}
|
||||
|
||||
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
func parseJWTUsers(ac *AuthConfig, oidcDP *oidcDiscovererPool) ([]*UserInfo, error) {
|
||||
jui := make([]*UserInfo, 0, len(ac.Users))
|
||||
oidcDP := &oidcDiscovererPool{}
|
||||
|
||||
uniqClaims := make(map[string]*UserInfo)
|
||||
var sortedClaims []string
|
||||
@@ -85,10 +84,10 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
}
|
||||
|
||||
if ui.AuthToken != "" || ui.BearerToken != "" || ui.Username != "" || ui.Password != "" {
|
||||
return nil, nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
|
||||
return nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
|
||||
}
|
||||
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify && jwtToken.OIDC == nil {
|
||||
return nil, nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true")
|
||||
return nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true")
|
||||
}
|
||||
var claimsString string
|
||||
sortedClaims = sortedClaims[:0]
|
||||
@@ -97,7 +96,7 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
sortedClaims = append(sortedClaims, fmt.Sprintf("%s=%s", ck, cv))
|
||||
pc, err := jwt.NewClaim(ck, cv)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("incorrect match claim, key=%q, value regex=%q: %w", ck, cv, err)
|
||||
return nil, fmt.Errorf("incorrect match claim, key=%q, value regex=%q: %w", ck, cv, err)
|
||||
}
|
||||
parsedClaims = append(parsedClaims, pc)
|
||||
}
|
||||
@@ -106,7 +105,7 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
claimsString = strings.Join(sortedClaims, ",")
|
||||
|
||||
if oldUI, ok := uniqClaims[claimsString]; ok {
|
||||
return nil, nil, fmt.Errorf("duplicate match claims=%q found for name=%q at idx=%d; the previous one is set for name=%q", claimsString, ui.Name, idx, oldUI.Name)
|
||||
return nil, fmt.Errorf("duplicate match claims=%q found for name=%q at idx=%d; the previous one is set for name=%q", claimsString, ui.Name, idx, oldUI.Name)
|
||||
}
|
||||
uniqClaims[claimsString] = &ui
|
||||
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 {
|
||||
@@ -115,7 +114,7 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
for i := range jwtToken.PublicKeys {
|
||||
k, err := jwt.ParseKey([]byte(jwtToken.PublicKeys[i]))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
@@ -123,52 +122,52 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
for _, filePath := range jwtToken.PublicKeyFiles {
|
||||
keyData, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
|
||||
return nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
|
||||
}
|
||||
k, err := jwt.ParseKey(keyData)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
|
||||
return nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
vp, err := jwt.NewVerifierPool(keys)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jwtToken.verifierPool.Store(vp)
|
||||
}
|
||||
if jwtToken.OIDC != nil {
|
||||
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 || jwtToken.SkipVerify {
|
||||
return nil, nil, fmt.Errorf("jwt with oidc cannot contain public keys or have skip_verify=true")
|
||||
return nil, fmt.Errorf("jwt with oidc cannot contain public keys or have skip_verify=true")
|
||||
}
|
||||
|
||||
if jwtToken.OIDC.Issuer == "" {
|
||||
return nil, nil, fmt.Errorf("oidc issuer cannot be empty")
|
||||
return nil, fmt.Errorf("oidc issuer cannot be empty")
|
||||
}
|
||||
isserURL, err := url.Parse(jwtToken.OIDC.Issuer)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("oidc issuer %q must be a valid URL", jwtToken.OIDC.Issuer)
|
||||
return nil, fmt.Errorf("oidc issuer %q must be a valid URL", jwtToken.OIDC.Issuer)
|
||||
}
|
||||
if isserURL.Scheme != "https" && isserURL.Scheme != "http" {
|
||||
return nil, nil, fmt.Errorf("oidc issuer %q must have http or https scheme", jwtToken.OIDC.Issuer)
|
||||
return nil, fmt.Errorf("oidc issuer %q must have http or https scheme", jwtToken.OIDC.Issuer)
|
||||
}
|
||||
|
||||
oidcDP.createOrAdd(ui.JWT.OIDC.Issuer, &ui.JWT.verifierPool)
|
||||
}
|
||||
|
||||
if err := parseJWTPlaceholdersForUserInfo(&ui, true); err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metricLabels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
||||
@@ -187,7 +186,7 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
|
||||
rt, err := newRoundTripper(ui.TLSCAFile, ui.TLSCertFile, ui.TLSKeyFile, ui.TLSServerName, ui.TLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
|
||||
return nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
|
||||
}
|
||||
ui.rt = rt
|
||||
|
||||
@@ -200,7 +199,7 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
return len(jui[i].JWT.MatchClaims) > len(jui[j].JWT.MatchClaims)
|
||||
})
|
||||
|
||||
return jui, oidcDP, nil
|
||||
return jui, nil
|
||||
}
|
||||
|
||||
var tokenPool sync.Pool
|
||||
|
||||
@@ -39,16 +39,14 @@ XOtclIk1uhc03oL9nOQ=
|
||||
}
|
||||
return
|
||||
}
|
||||
users, oidcDP, err := parseJWTUsers(ac)
|
||||
oidcDP := &oidcDiscovererPool{}
|
||||
users, err := parseJWTUsers(ac, oidcDP)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error; got %v", users)
|
||||
}
|
||||
if expErr != err.Error() {
|
||||
t.Fatalf("unexpected error; got\n%q\nwant \n%q", err.Error(), expErr)
|
||||
}
|
||||
if oidcDP != nil {
|
||||
t.Fatalf("expecting nil oidcDP; got %v", oidcDP)
|
||||
}
|
||||
}
|
||||
|
||||
// unauthorized_user cannot be used with jwt
|
||||
@@ -326,7 +324,8 @@ XOtclIk1uhc03oL9nOQ=
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
jui, oidcDP, err := parseJWTUsers(ac)
|
||||
oidcDP := &oidcDiscovererPool{}
|
||||
jui, err := parseJWTUsers(ac, oidcDP)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
49
app/vmestimator/cardinality_metrics.go
Normal file
49
app/vmestimator/cardinality_metrics.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
cardinalityMetricsWrites = metrics.NewCounter(`vmestimator_write_cardinality_metrics_total`)
|
||||
cardinalityMetricsWriteDuration = metrics.NewFloatCounter(`vmestimator_write_cardinality_metrics_duration_seconds_total`)
|
||||
cardinalityMetricsWriteBytes = metrics.NewCounter(`vmestimator_write_cardinality_metrics_size_bytes_total`)
|
||||
|
||||
cardinalityCacheMu sync.Mutex
|
||||
cardinalityMetricsCacheAt time.Time
|
||||
cardinalityMetricsCache []byte
|
||||
cardinalityMetricsCacheTTL = flag.Duration("cardinalityMetrics.cacheTTL", time.Second*30, "Duration for caching cardinality metrics response")
|
||||
cardinalityMetricsExposeAt = flag.String(`cardinalityMetrics.exposeAt`, `/metrics`, "HTTP path for exposing cardinality metrics. "+
|
||||
"If set to the default /metrics, cardinality metrics are merged with regular metrics and exposed together. "+
|
||||
"If set to a different path, only cardinality metrics are exposed at that endpoint. "+
|
||||
"If set to an empty value, cardinality metrics are not exposed via HTTP at all.")
|
||||
)
|
||||
|
||||
func writeCardinalityMetrics(w io.Writer, es []*estimator) {
|
||||
startTime := time.Now()
|
||||
|
||||
cardinalityCacheMu.Lock()
|
||||
if time.Since(cardinalityMetricsCacheAt) >= *cardinalityMetricsCacheTTL || *cardinalityMetricsCacheTTL == 0 {
|
||||
plain := bytes.NewBuffer(cardinalityMetricsCache[:0])
|
||||
for _, e := range es {
|
||||
e.writeMetrics(plain)
|
||||
}
|
||||
cardinalityMetricsCache = plain.Bytes()
|
||||
cardinalityMetricsCacheAt = time.Now()
|
||||
}
|
||||
cm := make([]byte, len(cardinalityMetricsCache))
|
||||
copy(cm, cardinalityMetricsCache)
|
||||
cardinalityCacheMu.Unlock()
|
||||
|
||||
_, _ = w.Write(cm)
|
||||
|
||||
cardinalityMetricsWrites.Inc()
|
||||
cardinalityMetricsWriteDuration.Add(time.Since(startTime).Seconds())
|
||||
cardinalityMetricsWriteBytes.Add(len(cm))
|
||||
}
|
||||
43
app/vmestimator/config.go
Normal file
43
app/vmestimator/config.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
Streams []EstimatorConfig `yaml:"streams"`
|
||||
}
|
||||
|
||||
type EstimatorConfig struct {
|
||||
GroupBy []string `yaml:"group_by"`
|
||||
GroupLimit int `yaml:"group_limit"`
|
||||
Labels map[string]string `yaml:"labels"`
|
||||
Interval time.Duration `yaml:"interval"`
|
||||
Buckets int `yaml:"buckets"`
|
||||
HLLPrecision uint8 `yaml:"hll_precision"`
|
||||
HLLSparse *bool `yaml:"hll_sparse"`
|
||||
}
|
||||
|
||||
func loadConfig(path string) (*Config, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file %q: %w", path, err)
|
||||
}
|
||||
var cfg Config
|
||||
if err := yaml.UnmarshalStrict(data, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse config file %q: %w", path, err)
|
||||
}
|
||||
for _, stream := range cfg.Streams {
|
||||
sort.Strings(stream.GroupBy)
|
||||
if stream.HLLPrecision != 0 && (stream.HLLPrecision < 4 || stream.HLLPrecision > 18) {
|
||||
return nil, fmt.Errorf("invalid precision %d: must be in range [4, 18]", stream.HLLPrecision)
|
||||
}
|
||||
}
|
||||
|
||||
return &cfg, nil
|
||||
}
|
||||
508
app/vmestimator/estimator.go
Normal file
508
app/vmestimator/estimator.go
Normal file
@@ -0,0 +1,508 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/axiomhq/hyperloglog"
|
||||
"github.com/dgryski/go-metro"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmestimator/protoparser"
|
||||
)
|
||||
|
||||
type estimator struct {
|
||||
groupBy []string
|
||||
groupByKeysLabel string
|
||||
|
||||
groupLimit int64
|
||||
groupSize atomic.Int64
|
||||
groupRejectedMu sync.Mutex
|
||||
groupRejectedSketch *hyperloglog.Sketch
|
||||
groupRejectedSketchPrev *hyperloglog.Sketch
|
||||
|
||||
buckets []*estimatorBucket
|
||||
|
||||
metricsSet *metrics.Set
|
||||
insertTotal *metrics.Counter
|
||||
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
func newEstimator(cfg EstimatorConfig) (*estimator, error) {
|
||||
if cfg.Interval == 0 {
|
||||
cfg.Interval = time.Minute * 5
|
||||
}
|
||||
if cfg.GroupLimit <= 0 {
|
||||
cfg.GroupLimit = 10000
|
||||
}
|
||||
if cfg.Buckets <= 0 {
|
||||
cfg.Buckets = min(64, 2*cgroup.AvailableCPUs())
|
||||
}
|
||||
if cfg.HLLPrecision == 0 {
|
||||
cfg.HLLPrecision = 14
|
||||
}
|
||||
if cfg.HLLSparse == nil {
|
||||
cfg.HLLSparse = new(true)
|
||||
}
|
||||
|
||||
metricPrefix := fmt.Sprintf("cardinality_estimate{interval=%q", cfg.Interval)
|
||||
if len(cfg.Labels) > 0 {
|
||||
keys := make([]string, 0, len(cfg.Labels))
|
||||
for k := range cfg.Labels {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, k := range keys {
|
||||
metricPrefix += fmt.Sprintf(",%s=%q", k, cfg.Labels[k])
|
||||
}
|
||||
}
|
||||
|
||||
groupByKeysLabel := "__global__"
|
||||
if len(cfg.GroupBy) > 0 {
|
||||
groupByKeysLabel = strings.Join(cfg.GroupBy, `,`)
|
||||
}
|
||||
|
||||
e := &estimator{
|
||||
groupBy: cfg.GroupBy,
|
||||
groupByKeysLabel: groupByKeysLabel,
|
||||
groupLimit: int64(cfg.GroupLimit),
|
||||
groupRejectedSketch: mustNewGroupRejectSketch(),
|
||||
groupRejectedSketchPrev: mustNewGroupRejectSketch(),
|
||||
buckets: make([]*estimatorBucket, cfg.Buckets),
|
||||
metricsSet: metrics.NewSet(),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
e.insertTotal = e.metricsSet.NewCounter(
|
||||
fmt.Sprintf(`vmestimator_estimator_insert_total{group_by_keys=%q}`, e.groupByKeysLabel),
|
||||
)
|
||||
e.metricsSet.NewGauge(fmt.Sprintf(`vmestimator_estimator_group_rejected_size{group_by_keys=%q}`, e.groupByKeysLabel), func() float64 {
|
||||
e.groupRejectedMu.Lock()
|
||||
defer e.groupRejectedMu.Unlock()
|
||||
return float64(e.groupRejectedSketch.Estimate())
|
||||
})
|
||||
|
||||
for i := 0; i < len(e.buckets); i++ {
|
||||
eb := &estimatorBucket{
|
||||
groupBy: cfg.GroupBy,
|
||||
extraLabels: cfg.Labels,
|
||||
interval: cfg.Interval,
|
||||
metricPrefix: metricPrefix,
|
||||
groupByKeysLabel: groupByKeysLabel,
|
||||
groupLimit: int64(cfg.GroupLimit),
|
||||
groupSize: &e.groupSize,
|
||||
groupRejectedMu: &e.groupRejectedMu,
|
||||
groupRejectedSketch: e.groupRejectedSketch,
|
||||
|
||||
precision: cfg.HLLPrecision,
|
||||
sparse: *cfg.HLLSparse,
|
||||
}
|
||||
|
||||
if len(cfg.GroupBy) == 0 {
|
||||
eb.sketch = eb.newSketch()
|
||||
} else {
|
||||
eb.groups = make(map[string]groupSketch)
|
||||
eb.prevGroups = make(map[string]groupSketch)
|
||||
|
||||
e.metricsSet.NewGauge(fmt.Sprintf(`vmestimator_estimator_group_size{group_by_keys=%q,bucket="%d"}`, eb.groupByKeysLabel, i), func() float64 {
|
||||
return float64(eb.groupSize.Load())
|
||||
})
|
||||
e.metricsSet.NewGauge(fmt.Sprintf(`vmestimator_estimator_group_limit{group_by_keys=%q,bucket="%d"}`, eb.groupByKeysLabel, i), func() float64 {
|
||||
return float64(eb.groupLimit)
|
||||
})
|
||||
}
|
||||
|
||||
e.buckets[i] = eb
|
||||
}
|
||||
|
||||
go e.runRotation(cfg.Interval)
|
||||
|
||||
metrics.RegisterSet(e.metricsSet)
|
||||
|
||||
return e, nil
|
||||
}
|
||||
|
||||
func (e *estimator) stop() {
|
||||
close(e.stopCh)
|
||||
e.metricsSet.UnregisterAllMetrics()
|
||||
}
|
||||
|
||||
var groupValuesPool = sync.Pool{}
|
||||
|
||||
func getGroupValuesKeySlice() *[]byte {
|
||||
v0 := groupValuesPool.Get()
|
||||
if v0 == nil {
|
||||
v := make([]byte, 128)
|
||||
return &v
|
||||
}
|
||||
|
||||
return v0.(*[]byte)
|
||||
}
|
||||
|
||||
func putGroupValuesSlice(key *[]byte) {
|
||||
if key == nil {
|
||||
return
|
||||
}
|
||||
|
||||
*key = (*key)[:0]
|
||||
groupValuesPool.Put(key)
|
||||
}
|
||||
|
||||
func (e *estimator) insertMany(tss []protoparser.TimeSerie) {
|
||||
bucketsNum := uint64(len(e.buckets))
|
||||
|
||||
groupValuesKeyP := getGroupValuesKeySlice()
|
||||
groupValuesKey := *groupValuesKeyP
|
||||
defer func() {
|
||||
*groupValuesKeyP = groupValuesKey
|
||||
putGroupValuesSlice(groupValuesKeyP)
|
||||
}()
|
||||
|
||||
groupValues := make([]string, len(e.groupBy))
|
||||
|
||||
var cnt int
|
||||
for _, ts := range tss {
|
||||
if len(e.groupBy) == 0 {
|
||||
i := int(ts.Fingerprint % bucketsNum)
|
||||
e.buckets[i].insert(ts, "", nil)
|
||||
cnt++
|
||||
continue
|
||||
}
|
||||
|
||||
groupValuesKey = groupValuesKey[:0]
|
||||
clear(groupValues)
|
||||
var hasNames bool
|
||||
for i, labelName := range e.groupBy {
|
||||
if i > 0 {
|
||||
groupValuesKey = append(groupValuesKey, ',')
|
||||
}
|
||||
|
||||
for _, l := range ts.GroupLabels {
|
||||
if l.Name == labelName {
|
||||
hasNames = true
|
||||
|
||||
groupValuesKey = append(groupValuesKey, l.Value...)
|
||||
groupValues[i] = l.Value
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// time series does not contribute to this groupBy
|
||||
if !hasNames {
|
||||
continue
|
||||
}
|
||||
|
||||
i := int(hash(groupValuesKey) % bucketsNum)
|
||||
e.buckets[i].insert(ts, bytesutil.ToUnsafeString(groupValuesKey), groupValues)
|
||||
cnt++
|
||||
}
|
||||
|
||||
e.insertTotal.Add(cnt)
|
||||
}
|
||||
|
||||
func (e *estimator) reset() {
|
||||
e.groupSize.Store(0)
|
||||
for _, b := range e.buckets {
|
||||
b.reset()
|
||||
}
|
||||
|
||||
e.groupRejectedMu.Lock()
|
||||
e.groupRejectedSketch.Reset()
|
||||
e.groupRejectedMu.Unlock()
|
||||
}
|
||||
|
||||
func (e *estimator) writeMetrics(w io.Writer) {
|
||||
eb0 := e.buckets[0]
|
||||
|
||||
if len(e.groupBy) == 0 {
|
||||
formatBuf := make([]byte, 0, 1024)
|
||||
resSK := eb0.newSketch()
|
||||
for _, eb := range e.buckets {
|
||||
eb.writeNoGroupMetric(resSK)
|
||||
}
|
||||
|
||||
formatBuf = append(formatBuf, eb0.metricPrefix...)
|
||||
formatBuf = append(formatBuf, `,group_by_keys="__global__"} `...)
|
||||
formatBuf = strconv.AppendUint(formatBuf, resSK.Estimate(), 10)
|
||||
formatBuf = append(formatBuf, "\n"...)
|
||||
if _, err := w.Write(formatBuf); err != nil {
|
||||
logger.Errorf("writing metrics failed: %s; written cardinality metrics might be incomplete or invalid", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
formatBuf := make([]byte, 0, 16384)
|
||||
formatBuf = append(formatBuf, eb0.metricPrefix...)
|
||||
formatBuf = append(formatBuf, `,group_by_keys="`...)
|
||||
formatBuf = append(formatBuf, eb0.groupByKeysLabel...)
|
||||
formatBuf = append(formatBuf, `",group_by_values=`...)
|
||||
|
||||
prefixLen := len(formatBuf)
|
||||
resSK := eb0.newSketch()
|
||||
for _, eb := range e.buckets {
|
||||
formatBuf = eb.writeGroupMetrics(w, resSK, formatBuf[:prefixLen])
|
||||
}
|
||||
|
||||
groupSize := e.groupSize.Load()
|
||||
if groupSize >= int64(float64(e.groupLimit)*0.8) {
|
||||
e.groupRejectedMu.Lock()
|
||||
res := mustNewGroupRejectSketch()
|
||||
if err := res.Merge(e.groupRejectedSketch); err != nil {
|
||||
logger.Fatalf("BUG: groupRejectedSketch merge failed: %s", err)
|
||||
}
|
||||
if err := res.Merge(e.groupRejectedSketchPrev); err != nil {
|
||||
logger.Fatalf("BUG: groupRejectedSketchPrev merge failed: %s", err)
|
||||
}
|
||||
e.groupRejectedMu.Unlock()
|
||||
|
||||
groupSize += int64(res.Estimate())
|
||||
}
|
||||
|
||||
formatBuf = formatBuf[:0]
|
||||
formatBuf = append(formatBuf, eb0.metricPrefix...)
|
||||
formatBuf = append(formatBuf, `,group_by_keys="__group__",group_by_values="`...)
|
||||
formatBuf = append(formatBuf, eb0.groupByKeysLabel...)
|
||||
formatBuf = append(formatBuf, `"} `...)
|
||||
formatBuf = strconv.AppendInt(formatBuf, groupSize, 10)
|
||||
formatBuf = append(formatBuf, "\n"...)
|
||||
if _, err := w.Write(formatBuf); err != nil {
|
||||
logger.Errorf("writing metrics failed: %s; written cardinality metrics might be incomplete or invalid", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *estimator) runRotation(interval time.Duration) {
|
||||
t := time.NewTicker(interval / 2)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
e.rotate()
|
||||
case <-e.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *estimator) rotate() {
|
||||
e.groupSize.Store(0)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := range e.buckets {
|
||||
wg.Go(e.buckets[i].rotate)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
e.groupRejectedMu.Lock()
|
||||
prevSK := e.groupRejectedSketchPrev
|
||||
prevSK.Reset()
|
||||
e.groupRejectedSketchPrev = e.groupRejectedSketch
|
||||
e.groupRejectedSketch = prevSK
|
||||
e.groupRejectedMu.Unlock()
|
||||
}
|
||||
|
||||
type estimatorBucket struct {
|
||||
mu sync.Mutex
|
||||
|
||||
groupBy []string
|
||||
groupLimit int64
|
||||
extraLabels map[string]string
|
||||
interval time.Duration
|
||||
metricPrefix string
|
||||
groupByKeysLabel string
|
||||
precision uint8
|
||||
sparse bool
|
||||
|
||||
sketch *hyperloglog.Sketch
|
||||
prevSketch *hyperloglog.Sketch
|
||||
|
||||
groupSize *atomic.Int64
|
||||
groups map[string]groupSketch
|
||||
prevGroups map[string]groupSketch
|
||||
|
||||
groupRejectedMu *sync.Mutex
|
||||
groupRejectedSketch *hyperloglog.Sketch
|
||||
}
|
||||
|
||||
func (eb *estimatorBucket) String() string {
|
||||
return fmt.Sprintf(
|
||||
"interval: %s; group_by: %v; extra_labels: %v", eb.interval, eb.groupBy, eb.extraLabels)
|
||||
}
|
||||
|
||||
func (eb *estimatorBucket) reset() {
|
||||
eb.mu.Lock()
|
||||
defer eb.mu.Unlock()
|
||||
|
||||
if len(eb.groupBy) == 0 {
|
||||
eb.prevSketch.Reset()
|
||||
eb.sketch.Reset()
|
||||
return
|
||||
}
|
||||
|
||||
eb.groups = make(map[string]groupSketch)
|
||||
eb.prevGroups = make(map[string]groupSketch)
|
||||
}
|
||||
|
||||
func (eb *estimatorBucket) rotate() {
|
||||
if len(eb.groupBy) == 0 {
|
||||
eb.mu.Lock()
|
||||
eb.prevSketch = eb.sketch
|
||||
eb.sketch = eb.newSketch()
|
||||
eb.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
eb.mu.Lock()
|
||||
eb.prevGroups = eb.groups
|
||||
eb.groups = make(map[string]groupSketch, len(eb.groups))
|
||||
eb.mu.Unlock()
|
||||
|
||||
eb.groupSize.Add(int64(len(eb.prevGroups)))
|
||||
}
|
||||
|
||||
func (eb *estimatorBucket) insert(ts protoparser.TimeSerie, groupValuesKey string, groupValues []string) {
|
||||
eb.mu.Lock()
|
||||
defer eb.mu.Unlock()
|
||||
|
||||
if len(eb.groupBy) == 0 {
|
||||
eb.sketch.InsertHash(ts.Fingerprint)
|
||||
return
|
||||
}
|
||||
|
||||
gsk, ok := eb.groups[groupValuesKey]
|
||||
if !ok {
|
||||
if _, ok := eb.prevGroups[groupValuesKey]; !ok {
|
||||
groupSize := eb.groupSize.Load()
|
||||
if groupSize+1 > eb.groupLimit {
|
||||
eb.groupRejectedMu.Lock()
|
||||
eb.groupRejectedSketch.InsertHash(hash([]byte(groupValuesKey)))
|
||||
eb.groupRejectedMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
eb.groupSize.Add(1)
|
||||
}
|
||||
|
||||
formatBuf := make([]byte, 0, 1024)
|
||||
formatBuf = strconv.AppendQuote(formatBuf, groupValuesKey)
|
||||
for i := range groupValues {
|
||||
formatBuf = append(formatBuf, ',')
|
||||
if eb.groupBy[i] == `__name__` {
|
||||
formatBuf = append(formatBuf, `by__name__`...)
|
||||
} else {
|
||||
formatBuf = append(formatBuf, `by_`...)
|
||||
formatBuf = append(formatBuf, eb.groupBy[i]...)
|
||||
}
|
||||
formatBuf = append(formatBuf, '=')
|
||||
formatBuf = strconv.AppendQuote(formatBuf, groupValues[i])
|
||||
}
|
||||
formatBuf = append(formatBuf, `} `...)
|
||||
|
||||
gsk = groupSketch{
|
||||
groupValueLabels: bytesutil.ToUnsafeString(formatBuf),
|
||||
|
||||
Sketch: eb.newSketch(),
|
||||
}
|
||||
|
||||
eb.groups[strings.Clone(groupValuesKey)] = gsk
|
||||
}
|
||||
gsk.InsertHash(ts.Fingerprint)
|
||||
}
|
||||
|
||||
func (eb *estimatorBucket) writeNoGroupMetric(res *hyperloglog.Sketch) {
|
||||
eb.mu.Lock()
|
||||
defer eb.mu.Unlock()
|
||||
|
||||
eb.mergeSketches(eb.sketch, eb.prevSketch, res)
|
||||
}
|
||||
|
||||
func (eb *estimatorBucket) writeGroupMetrics(w io.Writer, res *hyperloglog.Sketch, formatBuf []byte) []byte {
|
||||
eb.mu.Lock()
|
||||
defer eb.mu.Unlock()
|
||||
|
||||
prefixLen := len(formatBuf)
|
||||
|
||||
for valuesKey, gsk := range eb.groups {
|
||||
res.Reset()
|
||||
formatBuf = formatBuf[:prefixLen]
|
||||
|
||||
formatBuf = append(formatBuf, gsk.groupValueLabels...)
|
||||
|
||||
eb.mergeSketches(gsk.Sketch, eb.prevGroups[valuesKey].Sketch, res)
|
||||
formatBuf = strconv.AppendUint(formatBuf, res.Estimate(), 10)
|
||||
formatBuf = append(formatBuf, "\n"...)
|
||||
if _, err := w.Write(formatBuf); err != nil {
|
||||
logger.Errorf("writing metrics failed: %s; written cardinality metrics might be incomplete or invalid", err)
|
||||
}
|
||||
}
|
||||
|
||||
for valuesKey := range eb.prevGroups {
|
||||
if _, ok := eb.groups[valuesKey]; ok {
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
res.Reset()
|
||||
formatBuf = formatBuf[:prefixLen]
|
||||
|
||||
gsk := eb.prevGroups[valuesKey]
|
||||
formatBuf = append(formatBuf, gsk.groupValueLabels...)
|
||||
|
||||
eb.mergeSketches(nil, eb.prevGroups[valuesKey].Sketch, res)
|
||||
formatBuf = strconv.AppendUint(formatBuf, res.Estimate(), 10)
|
||||
formatBuf = append(formatBuf, "\n"...)
|
||||
if _, err := w.Write(formatBuf); err != nil {
|
||||
logger.Errorf("writing metrics failed: %s; written cardinality metrics might be incomplete or invalid", err)
|
||||
}
|
||||
}
|
||||
|
||||
return formatBuf[:prefixLen]
|
||||
}
|
||||
|
||||
func (eb *estimatorBucket) mergeSketches(cur, prev, res *hyperloglog.Sketch) {
|
||||
if err := res.Merge(cur); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if prev != nil {
|
||||
if err := res.Merge(prev); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (eb *estimatorBucket) newSketch() *hyperloglog.Sketch {
|
||||
return mustNewSketch(eb.precision, eb.sparse)
|
||||
}
|
||||
|
||||
type groupSketch struct {
|
||||
groupValueLabels string
|
||||
|
||||
*hyperloglog.Sketch
|
||||
}
|
||||
|
||||
func mustNewGroupRejectSketch() *hyperloglog.Sketch {
|
||||
return mustNewSketch(10, true)
|
||||
}
|
||||
|
||||
func mustNewSketch(precision uint8, sparse bool) *hyperloglog.Sketch {
|
||||
sk, err := hyperloglog.NewSketch(precision, sparse)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("cannot create HLL sketch with precision=%d and sparse=%v: %s", precision, sparse, err))
|
||||
}
|
||||
|
||||
return sk
|
||||
}
|
||||
|
||||
func hash(v []byte) uint64 {
|
||||
return metro.Hash64(v, 1337)
|
||||
}
|
||||
274
app/vmestimator/estimator_timing_test.go
Normal file
274
app/vmestimator/estimator_timing_test.go
Normal file
@@ -0,0 +1,274 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmestimator/protoparser"
|
||||
)
|
||||
|
||||
func BenchmarkEstimator_WriteMetrics(b *testing.B) {
|
||||
b.Run("NoGroup/NoPrev", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{Interval: time.Hour})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
insertSeriesIntoEstimator(e, 5_000, 0)
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
e.writeMetrics(io.Discard)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("NoGroup/WithPrev", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{Interval: time.Hour})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
insertSeriesIntoEstimator(e, 5_000, 0)
|
||||
for _, eb := range e.buckets {
|
||||
eb.rotate()
|
||||
}
|
||||
insertSeriesIntoEstimator(e, 5_000, 0)
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
e.writeMetrics(io.Discard)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("Group100/NoPrev", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{
|
||||
GroupBy: []string{"groupLabel"},
|
||||
Interval: time.Hour,
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
insertSeriesIntoEstimator(e, 5_000, 100)
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
e.writeMetrics(io.Discard)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("Group100/WithPrev", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{
|
||||
GroupBy: []string{"groupLabel"},
|
||||
Interval: time.Hour,
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
insertSeriesIntoEstimator(e, 5_000, 100)
|
||||
for _, eb := range e.buckets {
|
||||
eb.rotate()
|
||||
}
|
||||
insertSeriesIntoEstimator(e, 5_000, 100)
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
e.writeMetrics(io.Discard)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("Group10k/NoPrev", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{
|
||||
GroupBy: []string{"groupLabel"},
|
||||
Interval: time.Hour,
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
insertSeriesIntoEstimator(e, 50_000, 10_000)
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
e.writeMetrics(io.Discard)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("Group10k/WithPrev", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{
|
||||
GroupBy: []string{"groupLabel"},
|
||||
Interval: time.Hour,
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
insertSeriesIntoEstimator(e, 50_000, 10_000)
|
||||
for _, eb := range e.buckets {
|
||||
eb.rotate()
|
||||
}
|
||||
insertSeriesIntoEstimator(e, 50_000, 10_000)
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
e.writeMetrics(io.Discard)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkEstimator_InsertManyParallel(b *testing.B) {
|
||||
b.Run("NoGroup", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{Interval: time.Hour})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var i uint64
|
||||
for pb.Next() {
|
||||
e.insertMany([]protoparser.TimeSerie{{Fingerprint: i}})
|
||||
i++
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
b.Run("Group100", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{
|
||||
GroupBy: []string{"groupLabel"},
|
||||
Interval: time.Hour,
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var i uint64
|
||||
for pb.Next() {
|
||||
e.insertMany([]protoparser.TimeSerie{{
|
||||
GroupLabels: []protoparser.Label{{Name: "groupLabel", Value: fmt.Sprintf("%d", i%100)}},
|
||||
Fingerprint: i,
|
||||
}})
|
||||
i++
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
b.Run("Group10k", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{
|
||||
GroupBy: []string{"groupLabel"},
|
||||
Interval: time.Hour,
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var i uint64
|
||||
for pb.Next() {
|
||||
e.insertMany([]protoparser.TimeSerie{{
|
||||
GroupLabels: []protoparser.Label{{Name: "groupLabel", Value: fmt.Sprintf("%d", i%10_000)}},
|
||||
Fingerprint: i,
|
||||
}})
|
||||
i++
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
b.Run("Group100k", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{
|
||||
GroupBy: []string{"groupLabel"},
|
||||
Interval: time.Hour,
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var i uint64
|
||||
for pb.Next() {
|
||||
e.insertMany([]protoparser.TimeSerie{{
|
||||
GroupLabels: []protoparser.Label{{Name: "groupLabel", Value: fmt.Sprintf("%d", i%100_000)}},
|
||||
Fingerprint: i,
|
||||
}})
|
||||
i++
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// BenchmarkEstimator_InsertRotateCycle benchmarks the insert→rotate→insert cycle
|
||||
// for the global (no-group) estimator in two HLL regimes:
|
||||
// - Sparse: 1 000 series per interval (sketch stays in sparse mode)
|
||||
// - Normal: 30 000 series per interval (sketch converts to dense mode)
|
||||
func BenchmarkEstimator_InsertRotateCycle(b *testing.B) {
|
||||
b.Run("SparseHLL", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{Interval: time.Hour})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
insertSeriesIntoEstimator(e, 1_000, 0)
|
||||
e.rotate()
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("NormalHLL", func(b *testing.B) {
|
||||
e, err := newEstimator(EstimatorConfig{Interval: time.Hour})
|
||||
if err != nil {
|
||||
b.Fatalf("newEstimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
insertSeriesIntoEstimator(e, 30_000, 0)
|
||||
e.rotate()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// insertSeriesIntoEstimator inserts numSeries time series into e.
|
||||
// When groupsNum > 0 each series gets a "groupLabel" cycling through groupsNum values.
|
||||
func insertSeriesIntoEstimator(e *estimator, numSeries, groupsNum int) {
|
||||
for i := 0; i < numSeries; i++ {
|
||||
var labels []protoparser.Label
|
||||
if groupsNum > 0 {
|
||||
labels = append(labels, protoparser.Label{
|
||||
Name: "groupLabel",
|
||||
Value: fmt.Sprintf("%d", i%groupsNum),
|
||||
})
|
||||
}
|
||||
e.insertMany([]protoparser.TimeSerie{
|
||||
{
|
||||
GroupLabels: labels,
|
||||
Fingerprint: hash([]byte(fmt.Sprintf("foobarbaz%d", i))),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
595
app/vmestimator/etimator_test.go
Normal file
595
app/vmestimator/etimator_test.go
Normal file
@@ -0,0 +1,595 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmestimator/protoparser"
|
||||
)
|
||||
|
||||
func TestGlobalEstimate(t *testing.T) {
|
||||
genCard := func(cardinality int, seed string) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
var tss []protoparser.TimeSerie
|
||||
fpBuf := make([]byte, 8, 8+len(seed))
|
||||
for i := 0; i < cardinality; i++ {
|
||||
binary.LittleEndian.PutUint64(fpBuf[:8], uint64(i))
|
||||
fpBuf = append(fpBuf, seed...)
|
||||
tss = append(tss, protoparser.TimeSerie{
|
||||
Fingerprint: hash(fpBuf[:]),
|
||||
})
|
||||
|
||||
if i%10 == 0 {
|
||||
e.insertMany(tss)
|
||||
tss = tss[:0]
|
||||
}
|
||||
}
|
||||
if len(tss) > 0 {
|
||||
e.insertMany(tss)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f := func(gen func(e *estimator), expMetric string) {
|
||||
t.Helper()
|
||||
|
||||
cfg := EstimatorConfig{
|
||||
Interval: time.Minute * 10,
|
||||
Buckets: 5,
|
||||
}
|
||||
|
||||
e, err := newEstimator(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create new estimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
gen(e)
|
||||
|
||||
if len(e.buckets) != cfg.Buckets {
|
||||
t.Fatalf("expected buckets length to be %d but got %d", cfg.Buckets, len(e.buckets))
|
||||
}
|
||||
for i, eb := range e.buckets {
|
||||
if len(eb.groupBy) > 0 {
|
||||
t.Fatalf("expected bucket %d groupBy length to be 0 but got %d", i, len(eb.groupBy))
|
||||
}
|
||||
if eb.groups != nil {
|
||||
t.Fatalf("expected bucket %d groups length to be 0 but got %d", i, len(eb.groups))
|
||||
}
|
||||
if eb.groupSize.Load() != 0 {
|
||||
t.Fatalf("expected bucket %d groupSize to be 0 but got %d", i, eb.groupSize.Load())
|
||||
}
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(nil)
|
||||
e.writeMetrics(buf)
|
||||
|
||||
if strings.TrimSpace(buf.String()) != expMetric {
|
||||
t.Fatalf("\nexpected:\n%s\n\ngot:\n%s", expMetric, buf.String())
|
||||
}
|
||||
}
|
||||
|
||||
// no previous
|
||||
f(genCard(0, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genCard(1, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 1`)
|
||||
f(genCard(10, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 10`)
|
||||
f(genCard(100, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 100`)
|
||||
f(genCard(1000, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 1000`)
|
||||
f(genCard(5000, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 4998`)
|
||||
f(genCard(10000, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 9920`)
|
||||
f(genCard(100000, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 99658`)
|
||||
f(genCard(500000, ""), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 496552`)
|
||||
|
||||
// rotate once
|
||||
genRotateOnce := func(cardinality int) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(cardinality, "")(e)
|
||||
e.rotate()
|
||||
}
|
||||
}
|
||||
f(genRotateOnce(0), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateOnce(1), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 1`)
|
||||
f(genRotateOnce(10), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 10`)
|
||||
f(genRotateOnce(100), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 100`)
|
||||
f(genRotateOnce(1000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 1000`)
|
||||
f(genRotateOnce(5000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 4998`)
|
||||
f(genRotateOnce(10000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 9920`)
|
||||
f(genRotateOnce(100000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 99658`)
|
||||
f(genRotateOnce(500000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 496552`)
|
||||
|
||||
// insert, rotate insert the same
|
||||
genInsertRotateInsertSameOnce := func(cardinality int) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(cardinality/2, "")(e)
|
||||
e.rotate()
|
||||
genCard(cardinality/2, "")(e)
|
||||
}
|
||||
}
|
||||
f(genInsertRotateInsertSameOnce(0), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genInsertRotateInsertSameOnce(1), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genInsertRotateInsertSameOnce(10), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 5`)
|
||||
f(genInsertRotateInsertSameOnce(100), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 50`)
|
||||
f(genInsertRotateInsertSameOnce(1000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 500`)
|
||||
f(genInsertRotateInsertSameOnce(5000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 2499`)
|
||||
f(genInsertRotateInsertSameOnce(10000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 4998`)
|
||||
f(genInsertRotateInsertSameOnce(100000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 49529`)
|
||||
f(genInsertRotateInsertSameOnce(200000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 99658`)
|
||||
|
||||
// insert, rotate insert
|
||||
genInsertRotateInsertOnce := func(cardinality int) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(cardinality/2, "one")(e)
|
||||
e.rotate()
|
||||
genCard(cardinality/2, "two")(e)
|
||||
}
|
||||
}
|
||||
f(genInsertRotateInsertOnce(0), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genInsertRotateInsertOnce(1), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genInsertRotateInsertOnce(10), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 10`)
|
||||
f(genInsertRotateInsertOnce(100), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 100`)
|
||||
f(genInsertRotateInsertOnce(1000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 1000`)
|
||||
f(genInsertRotateInsertOnce(5000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 5000`)
|
||||
f(genInsertRotateInsertOnce(10000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 10058`)
|
||||
f(genInsertRotateInsertOnce(100000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 99543`)
|
||||
f(genInsertRotateInsertOnce(200000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 198814`)
|
||||
|
||||
// insert, rotate insert
|
||||
genRotateTwoTimes := func(cardinality int) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(cardinality, "")(e)
|
||||
e.rotate()
|
||||
e.rotate()
|
||||
}
|
||||
}
|
||||
f(genRotateTwoTimes(0), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateTwoTimes(1), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateTwoTimes(10), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateTwoTimes(100), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateTwoTimes(1000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateTwoTimes(5000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateTwoTimes(10000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateTwoTimes(100000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
f(genRotateTwoTimes(500000), `cardinality_estimate{interval="10m0s",group_by_keys="__global__"} 0`)
|
||||
}
|
||||
|
||||
func TestGroupEstimate(t *testing.T) {
|
||||
genCard := func(fooCard, barCard, bazCard int, seed string) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
var tss []protoparser.TimeSerie
|
||||
for fooI := 0; fooI < max(1, fooCard); fooI++ {
|
||||
for barI := 0; barI < max(1, barCard); barI++ {
|
||||
for bazI := 0; bazI < max(1, bazCard); bazI++ {
|
||||
ts := protoparser.TimeSerie{}
|
||||
ts.GroupLabels = append(ts.GroupLabels, protoparser.Label{Name: "__name__", Value: "the_metric_name"})
|
||||
if fooCard > 0 {
|
||||
ts.GroupLabels = append(ts.GroupLabels, protoparser.Label{Name: "foo", Value: fmt.Sprintf("%s%d", seed, fooI)})
|
||||
}
|
||||
if barCard > 0 {
|
||||
ts.GroupLabels = append(ts.GroupLabels, protoparser.Label{Name: "bar", Value: fmt.Sprintf("%s%d", seed, barI)})
|
||||
}
|
||||
if bazCard > 0 {
|
||||
ts.GroupLabels = append(ts.GroupLabels, protoparser.Label{Name: "baz", Value: fmt.Sprintf("%s%d", seed, bazI)})
|
||||
}
|
||||
var fpBuf []byte
|
||||
for _, l := range ts.GroupLabels {
|
||||
fpBuf = append(fpBuf, l.Name...)
|
||||
fpBuf = append(fpBuf, '=')
|
||||
fpBuf = append(fpBuf, l.Value...)
|
||||
fpBuf = append(fpBuf, ',')
|
||||
}
|
||||
fpBuf = append(fpBuf, seed...)
|
||||
ts.Fingerprint = hash(fpBuf)
|
||||
tss = append(tss, ts)
|
||||
}
|
||||
}
|
||||
}
|
||||
e.insertMany(tss)
|
||||
}
|
||||
}
|
||||
|
||||
f := func(groupBy []string, gen func(e *estimator), expMetrics string) {
|
||||
t.Helper()
|
||||
|
||||
cfg := EstimatorConfig{
|
||||
Interval: time.Minute * 10,
|
||||
GroupBy: groupBy,
|
||||
Buckets: 5,
|
||||
}
|
||||
|
||||
e, err := newEstimator(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create new estimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
gen(e)
|
||||
|
||||
if len(e.buckets) != cfg.Buckets {
|
||||
t.Fatalf("expected buckets length to be %d but got %d", cfg.Buckets, len(e.buckets))
|
||||
}
|
||||
for i, eb := range e.buckets {
|
||||
if eb.sketch != nil {
|
||||
t.Fatalf("expected bucket %d sketch to be nil", i)
|
||||
}
|
||||
if eb.prevSketch != nil {
|
||||
t.Fatalf("expected bucket %d prevSketch to be nil", i)
|
||||
}
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(nil)
|
||||
e.writeMetrics(buf)
|
||||
|
||||
lines := strings.Split(strings.TrimSpace(buf.String()), "\n")
|
||||
sort.Strings(lines)
|
||||
actMetrics := "\n" + strings.Join(lines, "\n")
|
||||
|
||||
if expMetrics != actMetrics {
|
||||
t.Fatalf("\nexpected:\n%s\n\ngot:\n%s", expMetrics, actMetrics)
|
||||
}
|
||||
}
|
||||
|
||||
// group by metric name
|
||||
f([]string{"__name__"}, genCard(10, 10, 10, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="__name__"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__name__",group_by_values="the_metric_name",by__name__="the_metric_name"} 1000`,
|
||||
)
|
||||
|
||||
// time series does not contribute to a group
|
||||
f([]string{"foo"}, genCard(0, 10, 10, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 0`,
|
||||
)
|
||||
f([]string{"foo", "bar"}, genCard(0, 0, 10, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo,bar"} 0`,
|
||||
)
|
||||
|
||||
// group by one label
|
||||
f([]string{"foo"}, genCard(1, 1, 0, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 1`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 2, 0, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 2`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 10, 0, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 10`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 100, 0, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 100`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 1000, 0, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 1000`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 10000, 0, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 9957`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 50000, 0, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 50387`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 1, 1, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 1`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 2, 2, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 4`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 10, 10, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 100`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 100, 100, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 9954`,
|
||||
)
|
||||
f([]string{"foo"}, genCard(1, 1000, 1000, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 1013124`,
|
||||
)
|
||||
|
||||
// group by one label, rotate
|
||||
genCardRotate := func(fooCard, barCard, bazCard int, seed string) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(fooCard, barCard, bazCard, seed)(e)
|
||||
e.rotate()
|
||||
}
|
||||
}
|
||||
f([]string{"foo"}, genCardRotate(1, 10, 10, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 100`,
|
||||
)
|
||||
f([]string{"foo"}, genCardRotate(1, 1000, 1000, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 1013124`,
|
||||
)
|
||||
|
||||
// group by one label, rotate, insert same
|
||||
genCardRotateInsertSame := func(barCard, bazCard int) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(1, barCard, bazCard, "")(e)
|
||||
e.rotate()
|
||||
genCard(1, barCard, bazCard, "")(e)
|
||||
}
|
||||
}
|
||||
f([]string{"foo"}, genCardRotateInsertSame(10, 10), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 100`,
|
||||
)
|
||||
f([]string{"foo"}, genCardRotateInsertSame(1000, 1000), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="0",by_foo="0"} 1013124`,
|
||||
)
|
||||
|
||||
// group by one label, rotate, insert diff
|
||||
genCardRotateInsertDiff := func(barCard, bazCard int) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(1, barCard, bazCard, "one")(e)
|
||||
e.rotate()
|
||||
genCard(1, barCard, bazCard, "two")(e)
|
||||
}
|
||||
}
|
||||
f([]string{"foo"}, genCardRotateInsertDiff(10, 10), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 2
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="one0",by_foo="one0"} 100
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="two0",by_foo="two0"} 100`,
|
||||
)
|
||||
f([]string{"foo"}, genCardRotateInsertDiff(1000, 1000), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 2
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="one0",by_foo="one0"} 995153
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="two0",by_foo="two0"} 992158`,
|
||||
)
|
||||
|
||||
// group by one label, rotate, insert diff
|
||||
genCardRotateTwice := func(barCard, bazCard int) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(1, barCard, bazCard, "one")(e)
|
||||
e.rotate()
|
||||
e.rotate()
|
||||
}
|
||||
}
|
||||
f([]string{"foo"}, genCardRotateTwice(10, 10), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 0`,
|
||||
)
|
||||
f([]string{"foo"}, genCardRotateTwice(1000, 1000), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 0`,
|
||||
)
|
||||
|
||||
// group by two labels
|
||||
f([]string{"foo", "bar"}, genCard(1, 1, 1000, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo,bar"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="0,0",by_foo="0",by_bar="0"} 1000`,
|
||||
)
|
||||
f([]string{"foo", "bar"}, genCard(2, 1, 1000, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo,bar"} 2
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="0,0",by_foo="0",by_bar="0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="1,0",by_foo="1",by_bar="0"} 1000`,
|
||||
)
|
||||
f([]string{"foo", "bar"}, genCard(2, 2, 1000, ""), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo,bar"} 4
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="0,0",by_foo="0",by_bar="0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="0,1",by_foo="0",by_bar="1"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="1,0",by_foo="1",by_bar="0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="1,1",by_foo="1",by_bar="1"} 1000`,
|
||||
)
|
||||
|
||||
// group by two labels, rotate
|
||||
genCardTwoLabelsRotate := func() func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(2, 2, 1000, "")(e)
|
||||
e.rotate()
|
||||
}
|
||||
}
|
||||
f([]string{"foo", "bar"}, genCardTwoLabelsRotate(), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo,bar"} 4
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="0,0",by_foo="0",by_bar="0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="0,1",by_foo="0",by_bar="1"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="1,0",by_foo="1",by_bar="0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="1,1",by_foo="1",by_bar="1"} 1000`,
|
||||
)
|
||||
|
||||
// group by two labels, rotate, insert same
|
||||
genCardTwoLabelsRotateInsertSame := func() func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(2, 2, 1000, "")(e)
|
||||
e.rotate()
|
||||
genCard(2, 2, 1000, "")(e)
|
||||
}
|
||||
}
|
||||
f([]string{"foo", "bar"}, genCardTwoLabelsRotateInsertSame(), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo,bar"} 4
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="0,0",by_foo="0",by_bar="0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="0,1",by_foo="0",by_bar="1"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="1,0",by_foo="1",by_bar="0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="1,1",by_foo="1",by_bar="1"} 1000`,
|
||||
)
|
||||
|
||||
// group by two labels, rotate, insert diff
|
||||
genCardTwoLabelsRotateInsertDiff := func() func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(2, 2, 1000, "one")(e)
|
||||
e.rotate()
|
||||
genCard(2, 2, 1000, "two")(e)
|
||||
}
|
||||
}
|
||||
f([]string{"foo", "bar"}, genCardTwoLabelsRotateInsertDiff(), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo,bar"} 8
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="one0,one0",by_foo="one0",by_bar="one0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="one0,one1",by_foo="one0",by_bar="one1"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="one1,one0",by_foo="one1",by_bar="one0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="one1,one1",by_foo="one1",by_bar="one1"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="two0,two0",by_foo="two0",by_bar="two0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="two0,two1",by_foo="two0",by_bar="two1"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="two1,two0",by_foo="two1",by_bar="two0"} 1000
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo,bar",group_by_values="two1,two1",by_foo="two1",by_bar="two1"} 1000`,
|
||||
)
|
||||
|
||||
// group by two labels, rotate, insert diff
|
||||
genCardTwoLabelsRotateTwice := func() func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
genCard(2, 2, 1000, "one")(e)
|
||||
e.rotate()
|
||||
e.rotate()
|
||||
}
|
||||
}
|
||||
f([]string{"foo", "bar"}, genCardTwoLabelsRotateTwice(), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo,bar"} 0`,
|
||||
)
|
||||
|
||||
// quote values: label values with special characters must be properly escaped
|
||||
genSpecialCard := func(fooVal string) func(e *estimator) {
|
||||
return func(e *estimator) {
|
||||
e.insertMany([]protoparser.TimeSerie{
|
||||
{
|
||||
GroupLabels: []protoparser.Label{{Name: "foo", Value: fooVal}},
|
||||
Fingerprint: hash([]byte("foo=" + fooVal + ",")),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// double quote in value
|
||||
f([]string{"foo"}, genSpecialCard(`a"b`), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a\"b",by_foo="a\"b"} 1`,
|
||||
)
|
||||
|
||||
f([]string{"foo"}, genSpecialCard(`a\b`), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a\\b",by_foo="a\\b"} 1`,
|
||||
)
|
||||
|
||||
f([]string{"foo"}, genSpecialCard("a\nb"), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a\nb",by_foo="a\nb"} 1`,
|
||||
)
|
||||
|
||||
f([]string{"foo"}, genSpecialCard("a\tb"), `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a\tb",by_foo="a\tb"} 1`,
|
||||
)
|
||||
}
|
||||
|
||||
func TestGroupEstimateGroupLimit(t *testing.T) {
|
||||
makeTS := func(fooVal string) protoparser.TimeSerie {
|
||||
return protoparser.TimeSerie{
|
||||
GroupLabels: []protoparser.Label{{Name: "foo", Value: fooVal}},
|
||||
Fingerprint: hash([]byte("foo=" + fooVal + ",")),
|
||||
}
|
||||
}
|
||||
|
||||
f := func(groupLimit int, gen func(e *estimator), expRejected int, expMetrics string) {
|
||||
t.Helper()
|
||||
|
||||
cfg := EstimatorConfig{
|
||||
Interval: time.Minute * 10,
|
||||
GroupBy: []string{"foo"},
|
||||
GroupLimit: groupLimit,
|
||||
Buckets: 3,
|
||||
}
|
||||
|
||||
e, err := newEstimator(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create new estimator: %v", err)
|
||||
}
|
||||
defer e.stop()
|
||||
|
||||
gen(e)
|
||||
|
||||
buf := bytes.NewBuffer(nil)
|
||||
e.writeMetrics(buf)
|
||||
|
||||
lines := strings.Split(strings.TrimSpace(buf.String()), "\n")
|
||||
sort.Strings(lines)
|
||||
actMetrics := "\n" + strings.Join(lines, "\n")
|
||||
|
||||
if expMetrics != actMetrics {
|
||||
t.Fatalf("\nexpected:\n%s\n\ngot:\n%s", expMetrics, actMetrics)
|
||||
}
|
||||
|
||||
var actRejected int
|
||||
if e.buckets[0].groupRejectedSketch != nil {
|
||||
actRejected = int(e.buckets[0].groupRejectedSketch.Estimate())
|
||||
}
|
||||
if expRejected != actRejected {
|
||||
t.Fatalf("rejected expected: %d; got: %d", expRejected, actRejected)
|
||||
}
|
||||
}
|
||||
|
||||
// all groups accepted
|
||||
f(3, func(e *estimator) {
|
||||
e.insertMany([]protoparser.TimeSerie{makeTS("a"), makeTS("b"), makeTS("c")})
|
||||
}, 0, `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 3
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a",by_foo="a"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="b",by_foo="b"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="c",by_foo="c"} 1`,
|
||||
)
|
||||
|
||||
// 2 groups only accepted
|
||||
f(2, func(e *estimator) {
|
||||
e.insertMany([]protoparser.TimeSerie{makeTS("a"), makeTS("b"), makeTS("c")})
|
||||
}, 1, `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 3
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a",by_foo="a"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="b",by_foo="b"} 1`,
|
||||
)
|
||||
|
||||
// one group only accepted
|
||||
f(1, func(e *estimator) {
|
||||
e.insertMany([]protoparser.TimeSerie{makeTS("a"), makeTS("b"), makeTS("c")})
|
||||
}, 2, `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 3
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a",by_foo="a"} 1`,
|
||||
)
|
||||
|
||||
// after rotate: groups in prevGroups bypass the limit; new groups are still checked
|
||||
f(2, func(e *estimator) {
|
||||
// fills limit
|
||||
e.insertMany([]protoparser.TimeSerie{makeTS("a"), makeTS("b")})
|
||||
e.rotate()
|
||||
// "a" bypasses, "c" rejected
|
||||
e.insertMany([]protoparser.TimeSerie{makeTS("a"), makeTS("c")})
|
||||
}, 1, `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 3
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a",by_foo="a"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="b",by_foo="b"} 1`,
|
||||
)
|
||||
|
||||
// after rotate: new group accepted when remaining capacity allows
|
||||
f(3, func(e *estimator) {
|
||||
// 2 groups, limit=3
|
||||
e.insertMany([]protoparser.TimeSerie{makeTS("a"), makeTS("b")})
|
||||
e.rotate()
|
||||
// "a" bypasses, "c" accepted (2+1=3 <= 3)
|
||||
e.insertMany([]protoparser.TimeSerie{makeTS("a"), makeTS("c")})
|
||||
}, 0, `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 3
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a",by_foo="a"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="b",by_foo="b"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="c",by_foo="c"} 1`,
|
||||
)
|
||||
|
||||
// reject 100
|
||||
f(3, func(e *estimator) {
|
||||
var tss []protoparser.TimeSerie
|
||||
for i := 0; i < 103; i++ {
|
||||
tss = append(tss, makeTS(fmt.Sprintf("a%d", i)))
|
||||
}
|
||||
e.insertMany(tss)
|
||||
}, 100, `
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="__group__",group_by_values="foo"} 103
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a0",by_foo="a0"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a1",by_foo="a1"} 1
|
||||
cardinality_estimate{interval="10m0s",group_by_keys="foo",group_by_values="a2",by_foo="a2"} 1`,
|
||||
)
|
||||
}
|
||||
123
app/vmestimator/main.go
Normal file
123
app/vmestimator/main.go
Normal file
@@ -0,0 +1,123 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmestimator/protoparser"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "TCP address to listen for incoming HTTP requests")
|
||||
configPath = flag.String("config", "config.yaml", "Path to YAML configuration file")
|
||||
|
||||
prometheusWriteRequests = metrics.NewCounter(`vmestimator_http_requests_total{path="/api/v1/write", protocol="promremotewrite"}`)
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
|
||||
cfg, err := loadConfig(*configPath)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot load config: %v", err)
|
||||
}
|
||||
|
||||
estimators := make([]*estimator, 0, len(cfg.Streams))
|
||||
for _, ec := range cfg.Streams {
|
||||
e, err := newEstimator(ec)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create estimator: %v", err)
|
||||
}
|
||||
estimators = append(estimators, e)
|
||||
}
|
||||
|
||||
if *cardinalityMetricsExposeAt == `/metrics` {
|
||||
metrics.RegisterMetricsWriter(func(w io.Writer) {
|
||||
writeCardinalityMetrics(w, estimators)
|
||||
})
|
||||
}
|
||||
|
||||
groupLabelsMap := make(map[string]struct{})
|
||||
for _, e := range estimators {
|
||||
for _, l := range e.groupBy {
|
||||
groupLabelsMap[l] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
groupLabels := make([]string, 0, len(groupLabelsMap))
|
||||
for k := range groupLabelsMap {
|
||||
groupLabels = append(groupLabels, k)
|
||||
}
|
||||
|
||||
listenAddrs := *httpListenAddrs
|
||||
if len(listenAddrs) == 0 {
|
||||
listenAddrs = []string{":8490"}
|
||||
}
|
||||
|
||||
logger.Infof("starting vmestimator at %q", listenAddrs)
|
||||
startTime := time.Now()
|
||||
|
||||
go httpserver.Serve(listenAddrs, func(w http.ResponseWriter, r *http.Request) bool {
|
||||
cmPath := *cardinalityMetricsExposeAt
|
||||
if cmPath != "/metrics" && cmPath != "" && r.URL.Path == cmPath {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
writeCardinalityMetrics(w, estimators)
|
||||
return true
|
||||
}
|
||||
|
||||
path, _ := strings.CutPrefix(r.URL.Path, `/cardinality`)
|
||||
switch path {
|
||||
case "/api/v1/write":
|
||||
prometheusWriteRequests.Inc()
|
||||
err := protoparser.Parse(r.Body, groupLabels, func(tss []protoparser.TimeSerie) {
|
||||
for _, e := range estimators {
|
||||
e.insertMany(tss)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "error parsing remote write request: %s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
case "/reset":
|
||||
for _, e := range estimators {
|
||||
e.reset()
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}, httpserver.ServeOptions{})
|
||||
|
||||
logger.Infof("started vmestimator in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
logger.Infof("gracefully shutting down webservice at %q", listenAddrs)
|
||||
if err := httpserver.Stop(listenAddrs); err != nil {
|
||||
logger.Errorf("cannot stop http server: %s", err)
|
||||
}
|
||||
for _, e := range estimators {
|
||||
e.stop()
|
||||
}
|
||||
logger.Infof("shutting down vmestimator")
|
||||
}
|
||||
78
app/vmestimator/protoparser/streamparser.go
Normal file
78
app/vmestimator/protoparser/streamparser.go
Normal file
@@ -0,0 +1,78 @@
|
||||
package protoparser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/snappy"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var maxInsertRequestSize = flagutil.NewBytes("maxInsertRequestSize", 32*1024*1024, "The maximum size in bytes of a single Prometheus remote_write API request")
|
||||
|
||||
// Parse parses Prometheus remote_write message from reader and calls callback for the parsed timeseries.
|
||||
//
|
||||
// callback shouldn't hold tss after returning.
|
||||
func Parse(r io.Reader, groupLabels []string, callback func(tss []TimeSerie)) error {
|
||||
startTime := fasttime.UnixTimestamp()
|
||||
|
||||
readCalls.Inc()
|
||||
err := protoparserutil.ReadUncompressedData(r, "", maxInsertRequestSize, func(data []byte) error {
|
||||
return parseRequestBody(data, groupLabels, callback)
|
||||
})
|
||||
if err != nil {
|
||||
readErrors.Inc()
|
||||
return fmt.Errorf("cannot read prometheus remote_write data from client in %d seconds: %w", fasttime.UnixTimestamp()-startTime, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseRequestBody(data []byte, groupLabels []string, callback func(tss []TimeSerie)) error {
|
||||
// Synchronously process the request in order to properly return errors to Parse caller,
|
||||
// so it could properly return HTTP 503 status code in response.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/896
|
||||
bb := bodyBufferPool.Get()
|
||||
defer bodyBufferPool.Put(bb)
|
||||
|
||||
if encoding.IsZstd(data) {
|
||||
var err error
|
||||
bb.B, err = encoding.DecompressZSTDLimited(bb.B[:0], data, maxInsertRequestSize.IntN())
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot decompress zstd-encoded request with length %d: %w", len(data), err)
|
||||
}
|
||||
} else {
|
||||
var err error
|
||||
bb.B, err = snappy.Decode(bb.B, data, maxInsertRequestSize.IntN())
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot decompress snappy-encoded request with length %d: %w", len(data), err)
|
||||
}
|
||||
}
|
||||
if int64(len(bb.B)) > maxInsertRequestSize.N {
|
||||
return fmt.Errorf("too big unpacked request; mustn't exceed `-maxInsertRequestSize=%d` bytes; got %d bytes", maxInsertRequestSize.N, len(bb.B))
|
||||
}
|
||||
wru := getWriteRequestUnmarshaler()
|
||||
defer putWriteRequestUnmarshaler(wru)
|
||||
if err := wru.UnmarshalProtobuf(bb.B, groupLabels, func(tss []TimeSerie) {
|
||||
rowsRead.Add(len(tss))
|
||||
callback(tss)
|
||||
}); err != nil {
|
||||
unmarshalErrors.Inc()
|
||||
return fmt.Errorf("cannot unmarshal prompb.WriteRequest with size %d bytes: %w", len(bb.B), err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var bodyBufferPool bytesutil.ByteBufferPool
|
||||
|
||||
var (
|
||||
readCalls = metrics.NewCounter(`vm_protoparser_read_calls_total{type="promremotewrite"}`)
|
||||
readErrors = metrics.NewCounter(`vm_protoparser_read_errors_total{type="promremotewrite"}`)
|
||||
rowsRead = metrics.NewCounter(`vm_protoparser_rows_read_total{type="promremotewrite"}`)
|
||||
unmarshalErrors = metrics.NewCounter(`vm_protoparser_unmarshal_errors_total{type="promremotewrite"}`)
|
||||
)
|
||||
67
app/vmestimator/protoparser/streamparser_timing_test.go
Normal file
67
app/vmestimator/protoparser/streamparser_timing_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package protoparser
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
func BenchmarkParse(b *testing.B) {
|
||||
data := buildSnappyEncodedWriteRequest(5000, 20, 20, 3)
|
||||
groupLabels := []string{
|
||||
"foo",
|
||||
"bar",
|
||||
"baz",
|
||||
"__name__",
|
||||
"job",
|
||||
"groupLabel",
|
||||
}
|
||||
|
||||
var cnt int
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(data)))
|
||||
for b.Loop() {
|
||||
err := Parse(bytes.NewReader(data), groupLabels, func(tss []TimeSerie) {
|
||||
cnt += len(tss)
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("stream.Parse: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// buildSnappyEncodedWriteRequest builds a snappy-encoded protobuf WriteRequest
|
||||
// with numSeries time series, each having numLabels labels of labelSize bytes each.
|
||||
func buildSnappyEncodedWriteRequest(numSeries, numLabels, labelSize, groupsNum int) []byte {
|
||||
labelValue := strings.Repeat("x", labelSize)
|
||||
|
||||
tss := make([]prompb.TimeSeries, numSeries)
|
||||
for i := range tss {
|
||||
labels := make([]prompb.Label, numLabels)
|
||||
for j := range labels {
|
||||
labels[j] = prompb.Label{
|
||||
Name: fmt.Sprintf("label%02d", j),
|
||||
Value: fmt.Sprintf("val%05d_%s", i, labelValue),
|
||||
}
|
||||
}
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "groupLabel",
|
||||
Value: fmt.Sprintf("%d", i%groupsNum),
|
||||
})
|
||||
|
||||
tss[i] = prompb.TimeSeries{
|
||||
Labels: labels,
|
||||
Samples: []prompb.Sample{{Value: 1, Timestamp: 1000}},
|
||||
}
|
||||
}
|
||||
|
||||
wr := &prompb.WriteRequest{Timeseries: tss}
|
||||
pbData := wr.MarshalProtobuf(nil)
|
||||
return snappy.Encode(nil, pbData)
|
||||
}
|
||||
170
app/vmestimator/protoparser/write_request.go
Normal file
170
app/vmestimator/protoparser/write_request.go
Normal file
@@ -0,0 +1,170 @@
|
||||
package protoparser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/easyproto"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
type TimeSerie struct {
|
||||
GroupLabels []Label
|
||||
Fingerprint uint64
|
||||
}
|
||||
|
||||
type Label struct {
|
||||
Name string
|
||||
Value string
|
||||
}
|
||||
|
||||
func getWriteRequestUnmarshaler() *writeRequestUnmarshaler {
|
||||
v := wruPool.Get()
|
||||
if v == nil {
|
||||
return &writeRequestUnmarshaler{
|
||||
tss: make([]TimeSerie, 0, 1024),
|
||||
labelsPool: make([]Label, 0, 4096),
|
||||
d: xxhash.New(),
|
||||
}
|
||||
}
|
||||
return v.(*writeRequestUnmarshaler)
|
||||
}
|
||||
|
||||
func putWriteRequestUnmarshaler(wru *writeRequestUnmarshaler) {
|
||||
wru.Reset()
|
||||
wruPool.Put(wru)
|
||||
}
|
||||
|
||||
var wruPool sync.Pool
|
||||
|
||||
// WriteRequestUnmarshaler is reusable unmarshaler for WriteRequest protobuf messages.
|
||||
//
|
||||
// It maintains internal pools for labels and samples to reduce memory allocations.
|
||||
// See UnmarshalProtobuf for details on how to use it.
|
||||
type writeRequestUnmarshaler struct {
|
||||
tss []TimeSerie
|
||||
labelsPool []Label
|
||||
d *xxhash.Digest
|
||||
}
|
||||
|
||||
// Reset resets wru, so it could be re-used.
|
||||
func (wru *writeRequestUnmarshaler) Reset() {
|
||||
wru.tss = wru.tss[:0]
|
||||
wru.labelsPool = wru.labelsPool[:0]
|
||||
wru.d.Reset()
|
||||
}
|
||||
|
||||
func (wru *writeRequestUnmarshaler) UnmarshalProtobuf(src []byte, groupLabels []string, callback func(tss []TimeSerie)) error {
|
||||
wru.Reset()
|
||||
|
||||
var err error
|
||||
|
||||
tss := wru.tss
|
||||
|
||||
// message WriteRequest {
|
||||
// repeated TimeSeries timeseries = 1;
|
||||
// reserved 2;
|
||||
// repeated Metadata metadata = 3;
|
||||
// }
|
||||
labelsPool := wru.labelsPool
|
||||
var fc easyproto.FieldContext
|
||||
for len(src) > 0 {
|
||||
if len(tss) >= cap(tss) {
|
||||
callback(tss)
|
||||
tss = tss[:0]
|
||||
labelsPool = labelsPool[:0]
|
||||
}
|
||||
|
||||
src, err = fc.NextField(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read the next field: %w", err)
|
||||
}
|
||||
switch fc.FieldNum {
|
||||
case 1:
|
||||
data, ok := fc.MessageData()
|
||||
if !ok {
|
||||
return fmt.Errorf("cannot read timeseries data")
|
||||
}
|
||||
tss = tss[:len(tss)+1]
|
||||
ts := &tss[len(tss)-1]
|
||||
d := wru.d
|
||||
d.Reset()
|
||||
labelsPool, err = ts.unmarshalProtobuf(data, groupLabels, labelsPool, d)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot unmarshal timeseries: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(tss) > 0 {
|
||||
callback(tss)
|
||||
tss = tss[:0]
|
||||
labelsPool = labelsPool[:0]
|
||||
}
|
||||
|
||||
wru.tss = tss[:0]
|
||||
wru.labelsPool = labelsPool
|
||||
wru.d.Reset()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ts *TimeSerie) unmarshalProtobuf(src []byte, groupLabels []string, labelsPool []Label, d *xxhash.Digest) ([]Label, error) {
|
||||
// message TimeSeries {
|
||||
// repeated Label labels = 1;
|
||||
// repeated Sample samples = 2;
|
||||
// }
|
||||
|
||||
labelsPoolLen := len(labelsPool)
|
||||
var fc easyproto.FieldContext
|
||||
var lfc easyproto.FieldContext
|
||||
for len(src) > 0 {
|
||||
var err error
|
||||
src, err = fc.NextField(src)
|
||||
if err != nil {
|
||||
return labelsPool, fmt.Errorf("cannot read the next field: %w", err)
|
||||
}
|
||||
switch fc.FieldNum {
|
||||
case 1:
|
||||
data, ok := fc.MessageData()
|
||||
if !ok {
|
||||
return labelsPool, fmt.Errorf("cannot read label data")
|
||||
}
|
||||
|
||||
var nameBytes, valueBytes []byte
|
||||
ldata := data
|
||||
for len(ldata) > 0 {
|
||||
ldata, err = lfc.NextField(ldata)
|
||||
if err != nil {
|
||||
return labelsPool, fmt.Errorf("cannot read label field: %w", err)
|
||||
}
|
||||
switch lfc.FieldNum {
|
||||
case 1:
|
||||
nameBytes, ok = lfc.Bytes()
|
||||
if !ok {
|
||||
return labelsPool, fmt.Errorf("cannot read label name")
|
||||
}
|
||||
case 2:
|
||||
valueBytes, ok = lfc.Bytes()
|
||||
if !ok {
|
||||
return labelsPool, fmt.Errorf("cannot read label value")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_, _ = d.Write(data)
|
||||
|
||||
name := bytesutil.ToUnsafeString(nameBytes)
|
||||
if slices.Contains(groupLabels, name) {
|
||||
labelsPool = append(labelsPool, Label{
|
||||
Name: name,
|
||||
Value: bytesutil.ToUnsafeString(valueBytes),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
ts.GroupLabels = labelsPool[labelsPoolLen:]
|
||||
ts.Fingerprint = d.Sum64()
|
||||
return labelsPool, nil
|
||||
}
|
||||
86
app/vmestimator/protoparser/write_request_timing_test.go
Normal file
86
app/vmestimator/protoparser/write_request_timing_test.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package protoparser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
func BenchmarkWriteRequest_UnmarshalProtobuf(b *testing.B) {
|
||||
var data = make([]byte, 0, 21_000_000)
|
||||
|
||||
f := func(rows, labels, labelSize, groupBy int) {
|
||||
bName := fmt.Sprintf("Rows=%d/Labels=%d/LabelSize=%d/GroupBy=%d", rows, labels, labelSize, groupBy)
|
||||
b.Run(bName, func(b *testing.B) {
|
||||
data := buildEncodedWriteRequest(data, rows, labels, labelSize, groupBy)
|
||||
groupLabels := []string{
|
||||
"foo",
|
||||
"bar",
|
||||
"baz",
|
||||
"__name__",
|
||||
"job",
|
||||
"groupLabel",
|
||||
}
|
||||
|
||||
wru := getWriteRequestUnmarshaler()
|
||||
cnt := 0
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(data)))
|
||||
for b.Loop() {
|
||||
wru.Reset()
|
||||
if err := wru.UnmarshalProtobuf(data, groupLabels, func(tss []TimeSerie) {
|
||||
cnt += len(tss)
|
||||
}); err != nil {
|
||||
b.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
f(5_000, 0, 0, 3)
|
||||
f(5_000, 1, 20, 3)
|
||||
|
||||
f(1_000, 20, 20, 3)
|
||||
f(5_000, 20, 20, 3)
|
||||
f(10_000, 20, 20, 3)
|
||||
f(20_000, 20, 20, 3)
|
||||
|
||||
// long label values
|
||||
f(1_000, 20, 2000, 3)
|
||||
|
||||
// many labels
|
||||
f(1_000, 2000, 100, 3)
|
||||
}
|
||||
|
||||
// buildEncodedWriteRequest builds a snappy-encoded protobuf WriteRequest
|
||||
// with numSeries time series, each having numLabels labels of labelSize bytes each.
|
||||
func buildEncodedWriteRequest(dst []byte, numSeries, numLabels, labelSize, groupsNum int) []byte {
|
||||
labelValue := strings.Repeat("x", labelSize)
|
||||
|
||||
tss := make([]prompb.TimeSeries, numSeries)
|
||||
for i := range tss {
|
||||
labels := make([]prompb.Label, numLabels)
|
||||
for j := range labels {
|
||||
labels[j] = prompb.Label{
|
||||
Name: fmt.Sprintf("label%02d", j),
|
||||
Value: fmt.Sprintf("val%05d_%s", i, labelValue),
|
||||
}
|
||||
}
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "groupLabel",
|
||||
Value: fmt.Sprintf("%d", i%groupsNum),
|
||||
})
|
||||
|
||||
tss[i] = prompb.TimeSeries{
|
||||
Labels: labels,
|
||||
Samples: []prompb.Sample{{Value: 1, Timestamp: 1000}},
|
||||
}
|
||||
}
|
||||
|
||||
wr := &prompb.WriteRequest{Timeseries: tss}
|
||||
return wr.MarshalProtobuf(dst[:0])
|
||||
}
|
||||
@@ -34,7 +34,9 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix potential corruption of remote-write metadata `Unit` values. See [#11120](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11120). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent more cases of panic during directory deletion on `NFS`-based mounts. See [#11060](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11060).
|
||||
|
||||
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)
|
||||
|
||||
|
||||
103
docs/victoriametrics/vmestimator.md
Normal file
103
docs/victoriametrics/vmestimator.md
Normal file
@@ -0,0 +1,103 @@
|
||||
---
|
||||
weight: 3
|
||||
menu:
|
||||
docs:
|
||||
parent: victoriametrics
|
||||
weight: 13
|
||||
title: vmestimator
|
||||
tags:
|
||||
- metrics
|
||||
- cardinality
|
||||
aliases:
|
||||
- /vmestimator.html
|
||||
- /vmestimator/index.html
|
||||
- /vmestimator/
|
||||
---
|
||||
|
||||
`cestimator` is a cardinality estimator that receives Prometheus remote write streams and exposes approximate time series cardinality as metrics.
|
||||
|
||||
It is useful for tracking how many unique time series are flowing through across all metrics, metric name, or broken down by specific labels.
|
||||
|
||||
## How it works
|
||||
|
||||
Running:
|
||||
```
|
||||
go run ./app/cestimator/... -config=streams.yaml -httpListenAddr=:8490
|
||||
```
|
||||
|
||||
Configuration:
|
||||
|
||||
```yaml
|
||||
streams:
|
||||
# Track total cardinality with no grouping.
|
||||
- interval: '1h'
|
||||
|
||||
# Track cardinality grouped by metric name.
|
||||
- interval: '1h'
|
||||
group_by: ["__name__"]
|
||||
|
||||
# Track cardinality grouped by job label.
|
||||
- interval: '1m'
|
||||
group_by: ["job"]
|
||||
|
||||
# Track cardinality grouped by tenant info
|
||||
- group_by: ["vm_account_id", "vm_project_id"]
|
||||
|
||||
# Track cardinality of jobs, with extra labels on the output metrics.
|
||||
- group_by: ["job"]
|
||||
labels:
|
||||
region: 'eu-central-1'
|
||||
env: 'production'
|
||||
```
|
||||
|
||||
Fields:
|
||||
- `group_by` (optional): list of label names to split cardinality by; each distinct combination gets its own estimate
|
||||
- `group_limit` (optional): maximum number of distinct groups to track; excess groups are counted in a rejected sketch but not individually; defaults to `10000`
|
||||
- `buckets` (optional): number of internal shards for parallel ingestion; defaults to `min(20, availableCPUs)`
|
||||
- `labels` (optional): extra labels attached to all output metrics for this estimator
|
||||
- `interval` (optional): how often to rotate (reset) counters; defaults to `5m`
|
||||
- `hll_precision` (optional): HyperLogLog precision, must be in range `[4, 18]`; higher values yield more accurate estimates at the cost of more memory; defaults to `14`
|
||||
- `hll_sparse` (optional): whether to use sparse HyperLogLog representation, which reduces memory for low-cardinality groups; defaults to `true`
|
||||
|
||||
Cardinality generator:
|
||||
|
||||
```
|
||||
go run ./app/cegen/main.go -cardI=100 -cardY=20 -template="foo{instance=\"127.0.0.[cardI]\",job=\"ametric[cardY]\"}"
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
By default, cardinality estimates are merged with regular metrics and exposed at `/metrics`.
|
||||
|
||||
This behavior is controlled by the `-cardinalityMetrics.exposeAt` flag:
|
||||
- `-cardinalityMetrics.exposeAt=/metrics` (default): cardinality metrics merged with regular metrics at `/metrics`
|
||||
- `-cardinalityMetrics.exposeAt=/cardinality/metrics`: only cardinality metrics exposed at that path
|
||||
- `-cardinalityMetrics.exposeAt=`: cardinality metrics not exposed via HTTP
|
||||
|
||||
All metrics include `interval`, `group_by_keys`, and `group_by_values` labels. Extra labels from the `labels` config field are inserted between `interval` and `group_by_keys` (sorted alphabetically).
|
||||
|
||||
**Without grouping** (`group_by_keys` is `__global__` and `group_by_values` is not set):
|
||||
```
|
||||
cardinality_estimate{interval="1h0m0s",group_by_keys="__global__"} 142300
|
||||
```
|
||||
|
||||
**With grouping** — one summary line (total distinct group count) plus one line per distinct label value combination. Each per-group line also includes individual `by_{key}="{val}"` labels for each group key:
|
||||
```
|
||||
cardinality_estimate{interval="5m0s",group_by_keys="__group__",group_by_values="instance,job"} 2
|
||||
cardinality_estimate{interval="5m0s",group_by_keys="instance,job",group_by_values="host1:9090,prometheus",by_instance="host1:9090",by_job="prometheus"} 312
|
||||
cardinality_estimate{interval="5m0s",group_by_keys="instance,job",group_by_values="host2:9100,node",by_instance="host2:9100",by_job="node"} 87
|
||||
```
|
||||
|
||||
**With extra labels:**
|
||||
```
|
||||
cardinality_estimate{interval="5m0s",env="production",region="eu-central-1",group_by_keys="job",group_by_values="prometheus",by_job="prometheus"} 312
|
||||
```
|
||||
|
||||
## Operational metrics
|
||||
|
||||
When grouping is enabled, cestimator exposes per-bucket operational metrics at `/metrics`:
|
||||
|
||||
- `cestimator_group_estimator_size{groupBy, bucket}` — number of active groups in this bucket after the last rotation
|
||||
- `cestimator_group_estimator_rejected_size{groupBy, bucket}` — estimated number of distinct group values rejected since the last rotation because `group_limit` was reached
|
||||
- `cestimator_group_limit{groupBy, bucket}` — configured `group_limit` for this bucket
|
||||
|
||||
5
go.mod
5
go.mod
@@ -2,6 +2,8 @@ module github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
go 1.26.4
|
||||
|
||||
replace github.com/axiomhq/hyperloglog => github.com/makasim/hyperloglog v0.0.10-reuse-memory
|
||||
|
||||
require (
|
||||
cloud.google.com/go/storage v1.62.3
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.22.0
|
||||
@@ -16,9 +18,11 @@ require (
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.25
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.22.27
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.103.3
|
||||
github.com/axiomhq/hyperloglog v0.0.0-00010101000000-000000000000
|
||||
github.com/bmatcuk/doublestar/v4 v4.10.0
|
||||
github.com/cespare/xxhash/v2 v2.3.0
|
||||
github.com/cheggaaa/pb/v3 v3.1.7
|
||||
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33
|
||||
github.com/gogo/protobuf v1.3.2
|
||||
github.com/golang/snappy v1.0.0
|
||||
github.com/google/go-cmp v0.7.0
|
||||
@@ -97,6 +101,7 @@ require (
|
||||
github.com/hashicorp/go-version v1.9.0 // indirect
|
||||
github.com/jpillora/backoff v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/kamstrup/intmap v0.5.2 // indirect
|
||||
github.com/knadh/koanf/maps v0.1.2 // indirect
|
||||
github.com/knadh/koanf/providers/confmap v1.0.0 // indirect
|
||||
github.com/knadh/koanf/v2 v2.3.5 // indirect
|
||||
|
||||
8
go.sum
8
go.sum
@@ -52,8 +52,6 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapp
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.57.0/go.mod h1:YqwkQPrWSC7+byyc1VlKbWLBF5JsW5IoL6xUkemYSXk=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.50.1-0.20260415124154-6b7a6357aec0 h1:2x1Tszv41PnCdSMumEtejz/On1RQ45kHQ+hhKT53sOk=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.50.1-0.20260415124154-6b7a6357aec0/go.mod h1:fQtmzaSUL+HJmHozeAKmnTJTOMBT+vBccv/VWQEwhUQ=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.121.1-0.20260616132739-c901a1e31cb3 h1:3eP8RRZitbga5EYiQ3IANrMPxpBwMAX4VA6akDaXwpU=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.121.1-0.20260616132739-c901a1e31cb3/go.mod h1:H4sDxcvk6OmC6zOt++IlDyrwfbn4F1eSLwMpR+kpRt8=
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0 h1:FJT9uNXA2isppFuJErbLqD306KoFlehl7Wn2dg/6oIE=
|
||||
@@ -152,6 +150,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE=
|
||||
github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA=
|
||||
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 h1:ucRHb6/lvW/+mTEIGbvhcYU3S8+uSNkuMjx/qZFfhtM=
|
||||
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
|
||||
github.com/digitalocean/godo v1.193.0 h1:CSbbUl5LufT75KPNvex3vDnBYjY2RfJWs7T3Ac7dHpA=
|
||||
github.com/digitalocean/godo v1.193.0/go.mod h1:xQsWpVCCbkDrWisHA72hPzPlnC+4W5w/McZY5ij9uvU=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
@@ -294,6 +294,8 @@ github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2E
|
||||
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/kamstrup/intmap v0.5.2 h1:qnwBm1mh4XAnW9W9Ue9tZtTff8pS6+s6iKF6JRIV2Dk=
|
||||
github.com/kamstrup/intmap v0.5.2/go.mod h1:gWUVWHKzWj8xpJVFf5GC0O26bWmv3GqdnIX/LMT6Aq4=
|
||||
github.com/keybase/go-keychain v0.0.1 h1:way+bWYa6lDppZoZcgMbYsvC7GxljxrskdNInRtuthU=
|
||||
github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXwkPPMeUgOK1k=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
@@ -316,6 +318,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/linode/linodego v1.69.1 h1:f45N2MHR/oece2/ktTTCYmrlfse4//k3NgwcF5zbGZ0=
|
||||
github.com/linode/linodego v1.69.1/go.mod h1:Fha0NYsQSx5VZK1HQNJY/z/dIxxkFp+vb5veawbmAUw=
|
||||
github.com/makasim/hyperloglog v0.0.10-reuse-memory h1:tqMXSDlkVujI/aGYUm6uwt4lRUQcne22MOLcJBgLAGc=
|
||||
github.com/makasim/hyperloglog v0.0.10-reuse-memory/go.mod h1:YjX/dQqCR/7QYX0g8mu8UZAjpIenz1FKM71UEsjFoTo=
|
||||
github.com/mattn/go-colorable v0.1.15 h1:+u9SLTRGnXv73cEsnsmoZBom+dMU88B2M0aDcWy0/jY=
|
||||
github.com/mattn/go-colorable v0.1.15/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
|
||||
github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
|
||||
|
||||
@@ -179,6 +179,9 @@ func tryRemoveDir(dirPath string) bool {
|
||||
// times simultaneously and properly close it, fs caching may still
|
||||
// confuse NFS client.
|
||||
if err := os.RemoveAll(dirEntryPath); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return
|
||||
}
|
||||
if !isTemporaryNFSError(err) {
|
||||
logger.Fatalf("FATAL: cannot remove %q: %s", dirEntryPath, err)
|
||||
}
|
||||
@@ -203,8 +206,9 @@ func tryRemoveDir(dirPath string) bool {
|
||||
|
||||
deleteFilePath := filepath.Join(dirPath, deleteDirFilename)
|
||||
// Remove the deleteDirFilename file, since there are no other entries left in the directory.
|
||||
MustRemovePath(deleteFilePath)
|
||||
|
||||
if !tryRemovePath(deleteFilePath) {
|
||||
return false
|
||||
}
|
||||
// Sync the directory after the removing deletDirFilename file in order to make sure
|
||||
// all the metadata files are removed at some exotic filesystems such as OSSFS2.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/649
|
||||
@@ -212,7 +216,9 @@ func tryRemoveDir(dirPath string) bool {
|
||||
MustSyncPath(dirPath)
|
||||
|
||||
// Remove the dirPath itself
|
||||
MustRemovePath(dirPath)
|
||||
if !tryRemovePath(dirPath) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Do not sync the parent directory for the dirPath - the caller can do this if needed.
|
||||
// It is OK if the dirPath will remain undeleted after unclean shutdown - it will be deleted
|
||||
@@ -221,6 +227,23 @@ func tryRemoveDir(dirPath string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// tryRemovePath removes given path and returns true on success
|
||||
// or false if error is temporary NFS error
|
||||
func tryRemovePath(path string) bool {
|
||||
if err := os.Remove(path); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return true
|
||||
}
|
||||
if !isTemporaryNFSError(err) {
|
||||
logger.Fatalf("FATAL: cannot remove %q: %s", path, err)
|
||||
}
|
||||
nfsDirRemoveFailedAttempts.Inc()
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
dirRemoverWG sync.WaitGroup
|
||||
nfsDirRemoveFailedAttempts = metrics.NewCounter(`vm_nfs_dir_remove_failed_attempts_total`)
|
||||
|
||||
16
vendor/github.com/axiomhq/hyperloglog/.gitignore
generated
vendored
Normal file
16
vendor/github.com/axiomhq/hyperloglog/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, build with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
|
||||
.glide/
|
||||
|
||||
/vendor
|
||||
41
vendor/github.com/axiomhq/hyperloglog/Contributing.md
generated
vendored
Normal file
41
vendor/github.com/axiomhq/hyperloglog/Contributing.md
generated
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
## How to Contribute
|
||||
|
||||
👍🎉 First of all, thank you for your interest in Axiom-node! We'd love to accept your patches and contributions! 🎉👍
|
||||
|
||||
This project accepts contributions. In order to contribute, you should pay attention to a few guidelines:
|
||||
|
||||
## Reporting Issues
|
||||
|
||||
Bugs, feature requests, and development-related questions should be directed to our [GitHub issue tracker](https://github.com/axiomhq/hyperloglog/issues).
|
||||
|
||||
When reporting a bug, please try and provide as much context as possible such as your operating system, Go version and anything else that might be relevant to the bug. For feature requests, please explain what you're trying to do and how the requested feature would help you do that.
|
||||
|
||||
## Setup
|
||||
|
||||
[Fork](https://github.com/axiomhq/hyperloglog.git), then clone this repository:
|
||||
|
||||
```
|
||||
git clone https://github.com/axiomhq/hyperloglog.git
|
||||
cd hyperloglog
|
||||
cd demo
|
||||
go run hyperloglog_demo.go
|
||||
```
|
||||
|
||||
## Submitting Modifications
|
||||
|
||||
1. It's generally best to start by opening a new issue describing the bug or feature you're intending to fix. Even if you think it's relatively minor, it's helpful to know what people are working on. Mention in the initial issue that you are planning to work on that bug or feature so that it can be assigned to you.
|
||||
|
||||
2. Follow the normal process of [forking](https://docs.github.com/en/free-pro-team@latest/github/getting-started-with-github/fork-a-repo) the project, and setup a new branch to work in. It's important that each group of changes be done in separate branches in order to ensure that a pull request only includes the commits related to that bug or feature.
|
||||
|
||||
3. Go makes it very simple to ensure properly formatted code, so always run `go fmt` on your code before committing it.
|
||||
|
||||
4. Do your best to have [well-formated commit messages](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html)
|
||||
for each change. This provides consistency throughout the project and ensures that commit messages are able to be formatted properly by various git tools.
|
||||
|
||||
5. Finally, push the commits to your fork and submit a [pull request](https://docs.github.com/en/free-pro-team@latest/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request)
|
||||
|
||||
### Once you've filed the PR:
|
||||
|
||||
- One or more maintainers will use GitHub's review feature to review your PR.
|
||||
- If the maintainer asks for any changes, edit your changes, push, and ask for another review.
|
||||
- If the maintainer decides to suggest some improvements or alternatives, modify and make improvements. Once your changes are approved, one of the project maintainers will merge them.
|
||||
19
vendor/github.com/axiomhq/hyperloglog/LICENSE
generated
vendored
Normal file
19
vendor/github.com/axiomhq/hyperloglog/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
Copyright (c) 2021, Axiom, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
51
vendor/github.com/axiomhq/hyperloglog/README.md
generated
vendored
Normal file
51
vendor/github.com/axiomhq/hyperloglog/README.md
generated
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# HyperLogLog - an algorithm for approximating the number of distinct elements
|
||||
|
||||
[](https://godoc.org/github.com/axiomhq/hyperloglog) [](https://goreportcard.com/report/github.com/axiomhq/hyperloglog) [](https://circleci.com/gh/axiomhq/hyperloglog/tree/master)
|
||||
|
||||
An improved version of [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) for the count-distinct problem, approximating the number of distinct elements in a multiset. This implementation offers enhanced performance, flexibility, and simplicity while maintaining accuracy.
|
||||
|
||||
## Note on Implementation History
|
||||
|
||||
The initial version of this work (tagged as v0.1.0) was based on ["Better with fewer bits: Improving the performance of cardinality estimation of large data streams - Qingjun Xiao, You Zhou, Shigang Chen"](https://www.cise.ufl.edu/~sgchen/Publications/XZC17.pdf). However, the current implementation has evolved significantly from this original basis, notably moving away from the tailcut method.
|
||||
|
||||
## Current Implementation
|
||||
|
||||
The current implementation is based on the LogLog-Beta algorithm, as described in:
|
||||
|
||||
["LogLog-Beta and More: A New Algorithm for Cardinality Estimation Based on LogLog Counting"](https://arxiv.org/pdf/1612.02284) by Jason Qin, Denys Kim, and Yumei Tung (2016).
|
||||
|
||||
Key features of the current implementation:
|
||||
* **Metro hash** used instead of xxhash
|
||||
* **Sparse representation** for lower cardinalities (like HyperLogLog++)
|
||||
* **LogLog-Beta** for dynamic bias correction across all cardinalities
|
||||
* **8-bit registers** for convenience and simplified implementation
|
||||
* **Order-independent insertions and merging** for consistent results regardless of data input order
|
||||
* **Removal of tailcut method** for a more straightforward approach
|
||||
* **Flexible precision** allowing for 2^4 to 2^18 registers
|
||||
|
||||
This implementation is now more straightforward, efficient, and flexible, while remaining backwards compatible with previous versions. It provides a balance between precision, memory usage, speed, and ease of use.
|
||||
|
||||
## Precision and Memory Usage
|
||||
|
||||
This implementation allows for creating HyperLogLog sketches with arbitrary precision between 2^4 and 2^18 registers. The memory usage scales with the number of registers:
|
||||
|
||||
* Minimum (2^4 registers): 16 bytes
|
||||
* Default (2^14 registers): 16 KB
|
||||
* Maximum (2^18 registers): 256 KB
|
||||
|
||||
Users can choose the precision that best fits their use case, balancing memory usage against estimation accuracy.
|
||||
|
||||
## Note
|
||||
A big thank you to Prof. Shigang Chen and his team at the University of Florida who are actively conducting research around "Big Network Data".
|
||||
|
||||
## Contributing
|
||||
|
||||
Kindly check our [contributing guide](https://github.com/axiomhq/hyperloglog/blob/main/Contributing.md) on how to propose bugfixes and improvements, and submitting pull requests to the project
|
||||
|
||||
## License
|
||||
|
||||
© Axiom, Inc., 2024
|
||||
|
||||
Distributed under MIT License (`The MIT License`).
|
||||
|
||||
See [LICENSE](LICENSE) for more information.
|
||||
273
vendor/github.com/axiomhq/hyperloglog/beta.go
generated
vendored
Normal file
273
vendor/github.com/axiomhq/hyperloglog/beta.go
generated
vendored
Normal file
@@ -0,0 +1,273 @@
|
||||
package hyperloglog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
var betaMap = map[uint8]func(float64) float64{
|
||||
4: beta4,
|
||||
5: beta5,
|
||||
6: beta6,
|
||||
7: beta7,
|
||||
8: beta8,
|
||||
9: beta9,
|
||||
10: beta10,
|
||||
11: beta11,
|
||||
12: beta12,
|
||||
13: beta13,
|
||||
14: beta14,
|
||||
15: beta15,
|
||||
16: beta16,
|
||||
17: beta17,
|
||||
18: beta18,
|
||||
}
|
||||
|
||||
func beta(p uint8, ez float64) float64 {
|
||||
f, ok := betaMap[p]
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("invalid precision %d", p))
|
||||
}
|
||||
return f(ez)
|
||||
}
|
||||
|
||||
/*
|
||||
p=4
|
||||
[-0.582581413904517,-1.935300357560050,11.07932375 8035073,-22.131357446444323,22.505391846630037,-12 .000723834917984,3.220579408194167,-0.342225302271 235]
|
||||
*/
|
||||
func beta4(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.582581413904517*ez +
|
||||
-1.935300357560050*zl +
|
||||
11.079323758035073*math.Pow(zl, 2) +
|
||||
-22.131357446444323*math.Pow(zl, 3) +
|
||||
22.505391846630037*math.Pow(zl, 4) +
|
||||
-12.000723834917984*math.Pow(zl, 5) +
|
||||
3.220579408194167*math.Pow(zl, 6) +
|
||||
-0.342225302271235*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=5
|
||||
[-0.7518999460733967,-0.9590030077748760,5.5997371 322141607,-8.2097636999765520,6.5091254894472037,- 2.6830293734323729,0.5612891113138221,-0.046333162 2196545]
|
||||
*/
|
||||
func beta5(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.7518999460733967*ez +
|
||||
-0.9590030077748760*zl +
|
||||
5.5997371322141607*math.Pow(zl, 2) +
|
||||
-8.2097636999765520*math.Pow(zl, 3) +
|
||||
6.5091254894472037*math.Pow(zl, 4) +
|
||||
-2.6830293734323729*math.Pow(zl, 5) +
|
||||
0.5612891113138221*math.Pow(zl, 6) +
|
||||
-0.0463331622196545*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=6
|
||||
[29.8257900969619634,-31.3287083337725925,-10.5942 523036582283,-11.5720125689099618,3.81887543739074 92,-2.4160130328530811,0.4542208940970826,-0.05751 55452020420]
|
||||
*/
|
||||
func beta6(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return 29.8257900969619634*ez +
|
||||
-31.3287083337725925*zl +
|
||||
-10.5942523036582283*math.Pow(zl, 2) +
|
||||
-11.5720125689099618*math.Pow(zl, 3) +
|
||||
3.8188754373907492*math.Pow(zl, 4) +
|
||||
-2.4160130328530811*math.Pow(zl, 5) +
|
||||
0.4542208940970826*math.Pow(zl, 6) +
|
||||
-0.0575155452020420*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=7
|
||||
[2.8102921290820060,-3.9780498518175995,1.31626800 41351582,-3.9252486335805901,2.0080835753946471,-0 .7527151937556955,0.1265569894242751,-0.0109946438726240]
|
||||
*/
|
||||
func beta7(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return 2.8102921290820060*ez +
|
||||
-3.9780498518175995*zl +
|
||||
1.3162680041351582*math.Pow(zl, 2) +
|
||||
-3.9252486335805901*math.Pow(zl, 3) +
|
||||
2.0080835753946471*math.Pow(zl, 4) +
|
||||
-0.7527151937556955*math.Pow(zl, 5) +
|
||||
0.1265569894242751*math.Pow(zl, 6) +
|
||||
-0.0109946438726240*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=8
|
||||
[1.00633544887550519,-2.00580666405112407,1.643697 49366514117,-2.70560809940566172,1.392099802442225 98,-0.46470374272183190,0.07384282377269775,-0.00578554885254223]
|
||||
*/
|
||||
func beta8(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return 1.00633544887550519*ez +
|
||||
-2.00580666405112407*zl +
|
||||
1.64369749366514117*math.Pow(zl, 2) +
|
||||
-2.70560809940566172*math.Pow(zl, 3) +
|
||||
1.39209980244222598*math.Pow(zl, 4) +
|
||||
-0.46470374272183190*math.Pow(zl, 5) +
|
||||
0.07384282377269775*math.Pow(zl, 6) +
|
||||
-0.00578554885254223*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=9
|
||||
[-0.09415657458167959,-0.78130975924550528,1.71514 946750712460,-1.73711250406516338,0.86441508489048 924,-0.23819027465047218,0.03343448400269076,-0.00 207858528178157]
|
||||
*/
|
||||
func beta9(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.09415657458167959*ez +
|
||||
-0.78130975924550528*zl +
|
||||
1.71514946750712460*math.Pow(zl, 2) +
|
||||
-1.73711250406516338*math.Pow(zl, 3) +
|
||||
0.86441508489048924*math.Pow(zl, 4) +
|
||||
-0.23819027465047218*math.Pow(zl, 5) +
|
||||
0.03343448400269076*math.Pow(zl, 6) +
|
||||
-0.00207858528178157*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=10
|
||||
[-0.25935400670790054,-0.52598301999805808,1.48933 034925876839,-1.29642714084993571,0.62284756217221615,-0.15672326770251041,0.02054415903878563,-0.00 112488483925502]
|
||||
*/
|
||||
func beta10(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.25935400670790054*ez +
|
||||
-0.52598301999805808*zl +
|
||||
1.48933034925876839*math.Pow(zl, 2) +
|
||||
-1.29642714084993571*math.Pow(zl, 3) +
|
||||
0.62284756217221615*math.Pow(zl, 4) +
|
||||
-0.15672326770251041*math.Pow(zl, 5) +
|
||||
0.02054415903878563*math.Pow(zl, 6) +
|
||||
-0.00112488483925502*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=11
|
||||
[-4.32325553856025e-01,-1.08450736399632e-01,6.091 56550741120e-01,-1.65687801845180e-02,-7.958293410 87617e-02,4.71830602102918e-02,-7.81372902346934e- 03,5.84268708489995e-04]
|
||||
*/
|
||||
func beta11(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.432325553856025*ez +
|
||||
-0.108450736399632*zl +
|
||||
0.609156550741120*math.Pow(zl, 2) +
|
||||
-0.0165687801845180*math.Pow(zl, 3) +
|
||||
-0.0795829341087617*math.Pow(zl, 4) +
|
||||
0.0471830602102918*math.Pow(zl, 5) +
|
||||
-0.00781372902346934*math.Pow(zl, 6) +
|
||||
0.000584268708489995*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=12
|
||||
[-3.84979202588598e-01,1.83162233114364e-01,1.3039 6688841854e-01,7.04838927629266e-02,-8.95893971464 453e-03,1.13010036741605e-02,-1.94285569591290e-03 ,2.25435774024964e-04]
|
||||
*/
|
||||
func beta12(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.384979202588598*ez +
|
||||
0.183162233114364*zl +
|
||||
0.130396688841854*math.Pow(zl, 2) +
|
||||
0.0704838927629266*math.Pow(zl, 3) +
|
||||
-0.0089589397146453*math.Pow(zl, 4) +
|
||||
0.0113010036741605*math.Pow(zl, 5) +
|
||||
-0.00194285569591290*math.Pow(zl, 6) +
|
||||
0.000225435774024964*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=13
|
||||
[-0.41655270946462997,-0.22146677040685156,0.38862 131236999947,0.45340979746062371,-0.36264738324476 375,0.12304650053558529,-0.01701540384555510,0.001 02750367080838]
|
||||
*/
|
||||
func beta13(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.41655270946462997*ez +
|
||||
-0.22146677040685156*zl +
|
||||
0.38862131236999947*math.Pow(zl, 2) +
|
||||
0.45340979746062371*math.Pow(zl, 3) +
|
||||
-0.36264738324476375*math.Pow(zl, 4) +
|
||||
0.12304650053558529*math.Pow(zl, 5) +
|
||||
-0.01701540384555510*math.Pow(zl, 6) +
|
||||
0.00102750367080838*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=14
|
||||
[-3.71009760230692e-01,9.78811941207509e-03,1.8579 6293324165e-01,2.03015527328432e-01,-1.16710521803 686e-01,4.31106699492820e-02,-5.99583540511831e-03 ,4.49704299509437e-04]
|
||||
*/
|
||||
|
||||
func beta14(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.371009760230692*ez +
|
||||
0.00978811941207509*zl +
|
||||
0.185796293324165*math.Pow(zl, 2) +
|
||||
0.203015527328432*math.Pow(zl, 3) +
|
||||
-0.116710521803686*math.Pow(zl, 4) +
|
||||
0.0431106699492820*math.Pow(zl, 5) +
|
||||
-0.00599583540511831*math.Pow(zl, 6) +
|
||||
0.000449704299509437*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=15
|
||||
[-0.38215145543875273,-0.89069400536090837,0.37602 335774678869,0.99335977440682377,-0.65577441638318 956,0.18332342129703610,-0.02241529633062872,0.001 21399789330194]
|
||||
*/
|
||||
func beta15(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.38215145543875273*ez +
|
||||
-0.89069400536090837*zl +
|
||||
0.37602335774678869*math.Pow(zl, 2) +
|
||||
0.99335977440682377*math.Pow(zl, 3) +
|
||||
-0.65577441638318956*math.Pow(zl, 4) +
|
||||
0.18332342129703610*math.Pow(zl, 5) +
|
||||
-0.02241529633062872*math.Pow(zl, 6) +
|
||||
0.00121399789330194*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=16
|
||||
[-0.37331876643753059,-1.41704077448122989,0.407291 84796612533,1.56152033906584164,-0.99242233534286128,0.26064681399483092,-0.03053811369682807,0.00155770210179105]
|
||||
*/
|
||||
func beta16(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.37331876643753059*ez +
|
||||
-1.41704077448122989*zl +
|
||||
0.40729184796612533*math.Pow(zl, 2) +
|
||||
1.56152033906584164*math.Pow(zl, 3) +
|
||||
-0.99242233534286128*math.Pow(zl, 4) +
|
||||
0.26064681399483092*math.Pow(zl, 5) +
|
||||
-0.03053811369682807*math.Pow(zl, 6) +
|
||||
0.00155770210179105*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=17
|
||||
[-0.36775502299404605,0.53831422351377967,0.769702 89278767923,0.55002583586450560,-0.745755882611469 41,0.25711835785821952,-0.03437902606864149,0.0018 5949146371616]
|
||||
*/
|
||||
func beta17(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.36775502299404605*ez +
|
||||
0.53831422351377967*zl +
|
||||
0.76970289278767923*math.Pow(zl, 2) +
|
||||
0.55002583586450560*math.Pow(zl, 3) +
|
||||
-0.74575588261146941*math.Pow(zl, 4) +
|
||||
0.25711835785821952*math.Pow(zl, 5) +
|
||||
-0.03437902606864149*math.Pow(zl, 6) +
|
||||
0.00185949146371616*math.Pow(zl, 7)
|
||||
}
|
||||
|
||||
/*
|
||||
p=18
|
||||
[-0.36479623325960542,0.99730412328635032,1.553543 86230081221,1.25932677198028919,-1.533259482091101 63,0.47801042200056593,-0.05951025172951174,0.0029 1076804642205]
|
||||
*/
|
||||
func beta18(ez float64) float64 {
|
||||
zl := math.Log(ez + 1)
|
||||
return -0.36479623325960542*ez +
|
||||
0.99730412328635032*zl +
|
||||
1.55354386230081221*math.Pow(zl, 2) +
|
||||
1.25932677198028919*math.Pow(zl, 3) +
|
||||
-1.53325948209110163*math.Pow(zl, 4) +
|
||||
0.47801042200056593*math.Pow(zl, 5) +
|
||||
-0.05951025172951174*math.Pow(zl, 6) +
|
||||
0.00291076804642205*math.Pow(zl, 7)
|
||||
}
|
||||
176
vendor/github.com/axiomhq/hyperloglog/compressed.go
generated
vendored
Normal file
176
vendor/github.com/axiomhq/hyperloglog/compressed.go
generated
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
package hyperloglog
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"slices"
|
||||
)
|
||||
|
||||
// Original author of this file is github.com/clarkduvall/hyperloglog
|
||||
|
||||
type iterator struct {
|
||||
i int
|
||||
last uint32
|
||||
v *compressedList
|
||||
}
|
||||
|
||||
func (iter *iterator) Next() uint32 {
|
||||
n, i := iter.v.decode(iter.i, iter.last)
|
||||
iter.last = n
|
||||
iter.i = i
|
||||
return n
|
||||
}
|
||||
|
||||
func (iter *iterator) Peek() (uint32, int) {
|
||||
return iter.v.decode(iter.i, iter.last)
|
||||
}
|
||||
|
||||
func (iter *iterator) Advance(last uint32, i int) {
|
||||
iter.last = last
|
||||
iter.i = i
|
||||
}
|
||||
|
||||
func (iter iterator) HasNext() bool {
|
||||
return iter.i < iter.v.Len()
|
||||
}
|
||||
|
||||
type compressedList struct {
|
||||
count uint32
|
||||
last uint32
|
||||
b variableLengthList
|
||||
}
|
||||
|
||||
func (v *compressedList) Clone() *compressedList {
|
||||
if v == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
newV := &compressedList{
|
||||
count: v.count,
|
||||
last: v.last,
|
||||
}
|
||||
|
||||
newV.b = make(variableLengthList, len(v.b))
|
||||
copy(newV.b, v.b)
|
||||
return newV
|
||||
}
|
||||
|
||||
func (v *compressedList) reset() {
|
||||
if v == nil {
|
||||
return
|
||||
}
|
||||
|
||||
v.count = 0
|
||||
v.last = 0
|
||||
v.b = v.b[:0]
|
||||
}
|
||||
|
||||
func (v *compressedList) AppendBinary(data []byte) ([]byte, error) {
|
||||
// At least 4 bytes for the two fixed sized values
|
||||
data = slices.Grow(data, 4+4)
|
||||
|
||||
// Marshal the count and last values.
|
||||
data = append(data,
|
||||
// Number of items in the list.
|
||||
byte(v.count>>24),
|
||||
byte(v.count>>16),
|
||||
byte(v.count>>8),
|
||||
byte(v.count),
|
||||
// The last item in the list.
|
||||
byte(v.last>>24),
|
||||
byte(v.last>>16),
|
||||
byte(v.last>>8),
|
||||
byte(v.last),
|
||||
)
|
||||
|
||||
// Append the variableLengthList
|
||||
return v.b.AppendBinary(data)
|
||||
}
|
||||
|
||||
func (v *compressedList) UnmarshalBinary(data []byte) error {
|
||||
if len(data) < 12 {
|
||||
return ErrorTooShort
|
||||
}
|
||||
|
||||
// Set the count.
|
||||
v.count, data = binary.BigEndian.Uint32(data[:4]), data[4:]
|
||||
|
||||
// Set the last value.
|
||||
v.last, data = binary.BigEndian.Uint32(data[:4]), data[4:]
|
||||
|
||||
// Set the list.
|
||||
sz, data := binary.BigEndian.Uint32(data[:4]), data[4:]
|
||||
v.b = make([]uint8, sz)
|
||||
if uint32(len(data)) < sz {
|
||||
return ErrorTooShort
|
||||
}
|
||||
for i := uint32(0); i < sz; i++ {
|
||||
v.b[i] = data[i]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func newCompressedList(capacity int) *compressedList {
|
||||
v := &compressedList{}
|
||||
v.b = make(variableLengthList, 0, capacity)
|
||||
return v
|
||||
}
|
||||
|
||||
func (v *compressedList) Len() int {
|
||||
return len(v.b)
|
||||
}
|
||||
|
||||
func (v *compressedList) decode(i int, last uint32) (uint32, int) {
|
||||
n, i := v.b.decode(i)
|
||||
return n + last, i
|
||||
}
|
||||
|
||||
func (v *compressedList) Append(x uint32) {
|
||||
v.count++
|
||||
v.b = v.b.Append(x - v.last)
|
||||
v.last = x
|
||||
}
|
||||
|
||||
func (v *compressedList) Iter() iterator {
|
||||
return iterator{0, 0, v}
|
||||
}
|
||||
|
||||
type variableLengthList []uint8
|
||||
|
||||
func (v variableLengthList) AppendBinary(data []byte) ([]byte, error) {
|
||||
// 4 bytes for the size of the list, and a byte for each element in the
|
||||
// list.
|
||||
data = slices.Grow(data, 4+len(v))
|
||||
|
||||
// Length of the list. We only need 32 bits because the size of the set
|
||||
// couldn't exceed that on 32 bit architectures.
|
||||
sz := len(v)
|
||||
data = append(data,
|
||||
byte(sz>>24),
|
||||
byte(sz>>16),
|
||||
byte(sz>>8),
|
||||
byte(sz),
|
||||
)
|
||||
|
||||
// Marshal each element in the list.
|
||||
data = append(data, v...)
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (v variableLengthList) decode(i int) (uint32, int) {
|
||||
var x uint32
|
||||
j := i
|
||||
for ; v[j]&0x80 != 0; j++ {
|
||||
x |= uint32(v[j]&0x7f) << (uint(j-i) * 7)
|
||||
}
|
||||
x |= uint32(v[j]) << (uint(j-i) * 7)
|
||||
return x, j + 1
|
||||
}
|
||||
|
||||
func (v variableLengthList) Append(x uint32) variableLengthList {
|
||||
for x&0xffffff80 != 0 {
|
||||
v = append(v, uint8((x&0x7f)|0x80))
|
||||
x >>= 7
|
||||
}
|
||||
return append(v, uint8(x&0x7f))
|
||||
}
|
||||
439
vendor/github.com/axiomhq/hyperloglog/hyperloglog.go
generated
vendored
Normal file
439
vendor/github.com/axiomhq/hyperloglog/hyperloglog.go
generated
vendored
Normal file
@@ -0,0 +1,439 @@
|
||||
package hyperloglog
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"slices"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
pp = uint8(25)
|
||||
mp = uint32(1) << pp
|
||||
version = 2
|
||||
)
|
||||
|
||||
type Sketch struct {
|
||||
p uint8
|
||||
m uint32
|
||||
alpha float64
|
||||
tmpSet set
|
||||
sparseList *compressedList
|
||||
regs []uint8
|
||||
}
|
||||
|
||||
// New returns a HyperLogLog Sketch with 2^14 registers (precision 14)
|
||||
func New() *Sketch { return New14() }
|
||||
|
||||
// New14 returns a HyperLogLog Sketch with 2^14 registers (precision 14)
|
||||
func New14() *Sketch { return newSketchNoError(14, true) }
|
||||
|
||||
// New16 returns a HyperLogLog Sketch with 2^16 registers (precision 16)
|
||||
func New16() *Sketch { return newSketchNoError(16, true) }
|
||||
|
||||
// NewNoSparse returns a HyperLogLog Sketch with 2^14 registers (precision 14) that will not use a sparse representation
|
||||
func NewNoSparse() *Sketch { return newSketchNoError(14, false) }
|
||||
|
||||
// New16NoSparse returns a HyperLogLog Sketch with 2^16 registers (precision 16) that will not use a sparse representation
|
||||
func New16NoSparse() *Sketch { return newSketchNoError(16, false) }
|
||||
|
||||
func newSketchNoError(precision uint8, sparse bool) *Sketch {
|
||||
sk, _ := NewSketch(precision, sparse)
|
||||
return sk
|
||||
}
|
||||
|
||||
func NewSketch(precision uint8, sparse bool) (*Sketch, error) {
|
||||
if precision < 4 || precision > 18 {
|
||||
return nil, fmt.Errorf("p has to be >= 4 and <= 18")
|
||||
}
|
||||
m := uint32(1) << precision
|
||||
s := &Sketch{
|
||||
m: m,
|
||||
p: precision,
|
||||
alpha: alpha(float64(m)),
|
||||
}
|
||||
if sparse {
|
||||
s.tmpSet = makeSet(0)
|
||||
s.sparseList = getCompressedList(0)
|
||||
} else {
|
||||
s.regs = make([]uint8, m)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (sk *Sketch) sparse() bool { return sk.sparseList != nil }
|
||||
|
||||
// Clone returns a deep copy of sk.
|
||||
func (sk *Sketch) Clone() *Sketch {
|
||||
clone := *sk
|
||||
clone.regs = append([]uint8(nil), sk.regs...)
|
||||
clone.tmpSet = sk.tmpSet.Clone()
|
||||
clone.sparseList = sk.sparseList.Clone()
|
||||
return &clone
|
||||
}
|
||||
|
||||
func (sk *Sketch) Reset() {
|
||||
if sk.sparse() {
|
||||
sk.tmpSet.reset()
|
||||
sk.sparseList.reset()
|
||||
return
|
||||
}
|
||||
|
||||
clear(sk.regs)
|
||||
}
|
||||
|
||||
func (sk *Sketch) maybeToNormal() {
|
||||
if uint32(sk.tmpSet.Len())*100 > sk.m {
|
||||
sk.mergeSparse()
|
||||
|
||||
m := sk.m
|
||||
if m > 8096 {
|
||||
m -= m / 10
|
||||
}
|
||||
|
||||
if uint32(sk.sparseList.Len()) > m {
|
||||
sk.toNormal()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (sk *Sketch) Merge(other *Sketch) error {
|
||||
if other == nil {
|
||||
return nil
|
||||
}
|
||||
if sk.p != other.p {
|
||||
return errors.New("precisions must be equal")
|
||||
}
|
||||
|
||||
if sk.sparse() && other.sparse() {
|
||||
sk.mergeSparseSketch(other)
|
||||
} else {
|
||||
sk.mergeDenseSketch(other)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sk *Sketch) mergeSparseSketch(other *Sketch) {
|
||||
sk.tmpSet.Merge(other.tmpSet)
|
||||
for iter := other.sparseList.Iter(); iter.HasNext(); {
|
||||
sk.tmpSet.add(iter.Next())
|
||||
}
|
||||
sk.maybeToNormal()
|
||||
}
|
||||
|
||||
func (sk *Sketch) mergeDenseSketch(other *Sketch) {
|
||||
if sk.sparse() {
|
||||
sk.toNormal()
|
||||
}
|
||||
|
||||
if other.sparse() {
|
||||
other.tmpSet.ForEach(func(k uint32) {
|
||||
i, r := decodeHash(k, other.p, pp)
|
||||
sk.insert(i, r)
|
||||
})
|
||||
for iter := other.sparseList.Iter(); iter.HasNext(); {
|
||||
i, r := decodeHash(iter.Next(), other.p, pp)
|
||||
sk.insert(i, r)
|
||||
}
|
||||
} else {
|
||||
for i, v := range other.regs {
|
||||
if v > sk.regs[i] {
|
||||
sk.regs[i] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (sk *Sketch) toNormal() {
|
||||
if sk.tmpSet.Len() > 0 {
|
||||
sk.mergeSparse()
|
||||
}
|
||||
|
||||
sk.regs = make([]uint8, sk.m)
|
||||
for iter := sk.sparseList.Iter(); iter.HasNext(); {
|
||||
i, r := decodeHash(iter.Next(), sk.p, pp)
|
||||
sk.insert(i, r)
|
||||
}
|
||||
|
||||
sk.tmpSet = nilSet
|
||||
putCompressedList(sk.sparseList)
|
||||
sk.sparseList = nil
|
||||
}
|
||||
|
||||
func (sk *Sketch) insert(i uint32, r uint8) { sk.regs[i] = max(r, sk.regs[i]) }
|
||||
func (sk *Sketch) Insert(e []byte) { sk.InsertHash(hash(e)) }
|
||||
|
||||
func (sk *Sketch) InsertHash(x uint64) {
|
||||
if sk.sparse() {
|
||||
if sk.tmpSet.add(encodeHash(x, sk.p, pp)) {
|
||||
sk.maybeToNormal()
|
||||
}
|
||||
return
|
||||
}
|
||||
i, r := getPosVal(x, sk.p)
|
||||
sk.insert(uint32(i), r)
|
||||
}
|
||||
|
||||
func (sk *Sketch) Estimate() uint64 {
|
||||
if sk.sparse() {
|
||||
sk.mergeSparse()
|
||||
return uint64(linearCount(mp, mp-sk.sparseList.count))
|
||||
}
|
||||
|
||||
sum, ez := sumAndZeros(sk.regs)
|
||||
m := float64(sk.m)
|
||||
|
||||
est := sk.alpha * m * (m - ez) / (sum + beta(sk.p, ez))
|
||||
return uint64(est + 0.5)
|
||||
}
|
||||
|
||||
var compressedListPools = newCompressedListPools()
|
||||
|
||||
func newCompressedListPools() [8]*sync.Pool {
|
||||
pools := [8]*sync.Pool{}
|
||||
for i := 0; i < len(pools); i++ {
|
||||
pools[i] = &sync.Pool{}
|
||||
}
|
||||
return pools
|
||||
}
|
||||
|
||||
func getCompressedList(requestedCapacity int) *compressedList {
|
||||
var pool *sync.Pool
|
||||
var capacity int
|
||||
if capacity = 256; requestedCapacity < capacity {
|
||||
pool = compressedListPools[0]
|
||||
} else if capacity = 512; requestedCapacity < capacity {
|
||||
pool = compressedListPools[1]
|
||||
} else if capacity = 1024; requestedCapacity < capacity {
|
||||
pool = compressedListPools[2]
|
||||
} else if capacity = 2048; requestedCapacity < capacity {
|
||||
pool = compressedListPools[3]
|
||||
} else if capacity = 4096; requestedCapacity < capacity {
|
||||
pool = compressedListPools[4]
|
||||
} else if capacity = 8196; requestedCapacity < capacity {
|
||||
pool = compressedListPools[5]
|
||||
} else if capacity = 16384; requestedCapacity < capacity {
|
||||
pool = compressedListPools[6]
|
||||
} else {
|
||||
capacity = requestedCapacity
|
||||
pool = compressedListPools[7]
|
||||
}
|
||||
|
||||
c := pool.Get()
|
||||
if c == nil {
|
||||
return newCompressedList(capacity - 1)
|
||||
}
|
||||
|
||||
c1 := c.(*compressedList)
|
||||
c1.b = slices.Grow(c1.b, capacity-1)
|
||||
return c1
|
||||
}
|
||||
|
||||
func putCompressedList(c *compressedList) {
|
||||
c.reset()
|
||||
capacity := cap(c.b)
|
||||
|
||||
if capacity < 256 {
|
||||
compressedListPools[0].Put(c)
|
||||
} else if capacity < 512 {
|
||||
compressedListPools[1].Put(c)
|
||||
} else if capacity < 1024 {
|
||||
compressedListPools[2].Put(c)
|
||||
} else if capacity < 2048 {
|
||||
compressedListPools[3].Put(c)
|
||||
} else if capacity < 4096 {
|
||||
compressedListPools[4].Put(c)
|
||||
} else if capacity < 8196 {
|
||||
compressedListPools[5].Put(c)
|
||||
} else if capacity < 16384 {
|
||||
compressedListPools[6].Put(c)
|
||||
} else {
|
||||
compressedListPools[7].Put(c)
|
||||
}
|
||||
}
|
||||
|
||||
func (sk *Sketch) mergeSparse() {
|
||||
if sk.tmpSet.Len() == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
keys := make([]uint32, 0, sk.tmpSet.Len())
|
||||
sk.tmpSet.ForEach(func(k uint32) {
|
||||
keys = append(keys, k)
|
||||
})
|
||||
slices.Sort(keys)
|
||||
|
||||
newList := getCompressedList(4*sk.tmpSet.Len() + sk.sparseList.Len())
|
||||
for iter, i := sk.sparseList.Iter(), 0; iter.HasNext() || i < len(keys); {
|
||||
if !iter.HasNext() {
|
||||
newList.Append(keys[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if i >= len(keys) {
|
||||
newList.Append(iter.Next())
|
||||
continue
|
||||
}
|
||||
|
||||
x1, adv := iter.Peek()
|
||||
x2 := keys[i]
|
||||
if x1 == x2 {
|
||||
newList.Append(x1)
|
||||
iter.Advance(x1, adv)
|
||||
i++
|
||||
} else if x1 > x2 {
|
||||
newList.Append(x2)
|
||||
i++
|
||||
} else {
|
||||
newList.Append(x1)
|
||||
iter.Advance(x1, adv)
|
||||
}
|
||||
}
|
||||
|
||||
putCompressedList(sk.sparseList)
|
||||
|
||||
sk.sparseList = newList
|
||||
sk.tmpSet.m.Clear()
|
||||
}
|
||||
|
||||
// MarshalBinary implements the encoding.BinaryMarshaler interface.
|
||||
//
|
||||
// When the result will be appended to another buffer, consider using
|
||||
// AppendBinary to avoid additional allocations and copying.
|
||||
func (sk *Sketch) MarshalBinary() (data []byte, err error) {
|
||||
return sk.AppendBinary(nil)
|
||||
}
|
||||
|
||||
// AppendBinary implements the encoding.BinaryAppender interface.
|
||||
func (sk *Sketch) AppendBinary(data []byte) ([]byte, error) {
|
||||
data = slices.Grow(data, 8+len(sk.regs))
|
||||
// Marshal a version marker.
|
||||
data = append(data, version)
|
||||
// Marshal p.
|
||||
data = append(data, sk.p)
|
||||
// Marshal b
|
||||
data = append(data, 0)
|
||||
|
||||
if sk.sparse() {
|
||||
// It's using the sparse Sketch.
|
||||
data = append(data, byte(1))
|
||||
|
||||
// Add the tmp_set
|
||||
data, err := sk.tmpSet.AppendBinary(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Add the sparse Sketch
|
||||
return sk.sparseList.AppendBinary(data)
|
||||
}
|
||||
|
||||
// It's using the dense Sketch.
|
||||
data = append(data, byte(0))
|
||||
|
||||
// Add the dense sketch Sketch.
|
||||
sz := len(sk.regs)
|
||||
data = append(data,
|
||||
byte(sz>>24),
|
||||
byte(sz>>16),
|
||||
byte(sz>>8),
|
||||
byte(sz),
|
||||
)
|
||||
|
||||
// Marshal each element in the list.
|
||||
for _, v := range sk.regs {
|
||||
data = append(data, byte(v))
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// ErrorTooShort is an error that UnmarshalBinary try to parse too short
|
||||
// binary.
|
||||
var ErrorTooShort = errors.New("too short binary")
|
||||
|
||||
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
|
||||
func (sk *Sketch) UnmarshalBinary(data []byte) error {
|
||||
if len(data) < 8 {
|
||||
return ErrorTooShort
|
||||
}
|
||||
|
||||
// Unmarshal version. We may need this in the future if we make
|
||||
// non-compatible changes.
|
||||
v := data[0]
|
||||
|
||||
// Unmarshal p.
|
||||
p := data[1]
|
||||
|
||||
// Unmarshal b.
|
||||
b := data[2]
|
||||
|
||||
// Determine if we need a sparse Sketch
|
||||
sparse := data[3] == byte(1)
|
||||
|
||||
// Make a newSketch Sketch if the precision doesn't match or if the Sketch was used
|
||||
if sk.p != p || sk.regs != nil || sk.tmpSet.Len() > 0 || (sk.sparseList != nil && sk.sparseList.Len() > 0) {
|
||||
newh, err := NewSketch(p, sparse)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*sk = *newh
|
||||
}
|
||||
|
||||
// h is now initialised with the correct p. We just need to fill the
|
||||
// rest of the details out.
|
||||
if sparse {
|
||||
// Using the sparse Sketch.
|
||||
|
||||
// Unmarshal the tmp_set.
|
||||
tssz := binary.BigEndian.Uint32(data[4:8])
|
||||
sk.tmpSet = makeSet(int(tssz))
|
||||
|
||||
// We need to unmarshal tssz values in total, and each value requires us
|
||||
// to read 4 bytes.
|
||||
tsLastByte := int((tssz * 4) + 8)
|
||||
for i := 8; i < tsLastByte; i += 4 {
|
||||
k := binary.BigEndian.Uint32(data[i : i+4])
|
||||
sk.tmpSet.add(k)
|
||||
}
|
||||
|
||||
// Unmarshal the sparse Sketch.
|
||||
return sk.sparseList.UnmarshalBinary(data[tsLastByte:])
|
||||
}
|
||||
|
||||
// Using the dense Sketch.
|
||||
sk.sparseList = nil
|
||||
sk.tmpSet = nilSet
|
||||
|
||||
if v == 1 {
|
||||
return sk.unmarshalBinaryV1(data[8:], b)
|
||||
}
|
||||
return sk.unmarshalBinaryV2(data)
|
||||
}
|
||||
|
||||
func sumAndZeros(regs []uint8) (res, ez float64) {
|
||||
for _, v := range regs {
|
||||
if v == 0 {
|
||||
ez++
|
||||
}
|
||||
res += 1.0 / math.Pow(2.0, float64(v))
|
||||
}
|
||||
return res, ez
|
||||
}
|
||||
|
||||
func (sk *Sketch) unmarshalBinaryV1(data []byte, b uint8) error {
|
||||
sk.regs = make([]uint8, len(data)*2)
|
||||
for i, v := range data {
|
||||
sk.regs[i*2] = uint8((v >> 4)) + b
|
||||
sk.regs[i*2+1] = uint8((v<<4)>>4) + b
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sk *Sketch) unmarshalBinaryV2(data []byte) error {
|
||||
sk.regs = data[8:]
|
||||
return nil
|
||||
}
|
||||
118
vendor/github.com/axiomhq/hyperloglog/sparse.go
generated
vendored
Normal file
118
vendor/github.com/axiomhq/hyperloglog/sparse.go
generated
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
package hyperloglog
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
"slices"
|
||||
|
||||
"github.com/kamstrup/intmap"
|
||||
)
|
||||
|
||||
func getIndex(k uint32, p, pp uint8) uint32 {
|
||||
if k&1 == 1 {
|
||||
return bextr32(k, 32-p, p)
|
||||
}
|
||||
return bextr32(k, pp-p+1, p)
|
||||
}
|
||||
|
||||
// Encode a hash to be used in the sparse representation.
|
||||
func encodeHash(x uint64, p, pp uint8) uint32 {
|
||||
idx := uint32(bextr(x, 64-pp, pp))
|
||||
if bextr(x, 64-pp, pp-p) == 0 {
|
||||
zeros := bits.LeadingZeros64((bextr(x, 0, 64-pp)<<pp)|(1<<pp-1)) + 1
|
||||
return idx<<7 | uint32(zeros<<1) | 1
|
||||
}
|
||||
return idx << 1
|
||||
}
|
||||
|
||||
// Decode a hash from the sparse representation.
|
||||
func decodeHash(k uint32, p, pp uint8) (uint32, uint8) {
|
||||
var r uint8
|
||||
if k&1 == 1 {
|
||||
r = uint8(bextr32(k, 1, 6)) + pp - p
|
||||
} else {
|
||||
// We can use the 64bit clz implementation and reduce the result
|
||||
// by 32 to get a clz for a 32bit word.
|
||||
r = uint8(bits.LeadingZeros64(uint64(k<<(32-pp+p-1))) - 31) // -32 + 1
|
||||
}
|
||||
return getIndex(k, p, pp), r
|
||||
}
|
||||
|
||||
type set struct {
|
||||
m *intmap.Set[uint32]
|
||||
}
|
||||
|
||||
func (s set) reset() {
|
||||
if s.m != nil {
|
||||
s.m.Clear()
|
||||
}
|
||||
}
|
||||
|
||||
var nilSet set
|
||||
|
||||
func makeSet(size int) set {
|
||||
return set{m: intmap.NewSet[uint32](size)}
|
||||
}
|
||||
|
||||
func (s set) ForEach(fn func(v uint32)) {
|
||||
s.m.ForEach(func(v uint32) bool {
|
||||
fn(v)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func (s set) Merge(other set) {
|
||||
other.m.ForEach(func(v uint32) bool {
|
||||
s.m.Add(v)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func (s set) Len() int {
|
||||
return s.m.Len()
|
||||
}
|
||||
|
||||
func (s set) add(v uint32) bool {
|
||||
return s.m.Add(v)
|
||||
}
|
||||
|
||||
func (s set) Clone() set {
|
||||
if s == nilSet {
|
||||
return nilSet
|
||||
}
|
||||
|
||||
newS := intmap.NewSet[uint32](s.m.Len())
|
||||
s.m.ForEach(func(v uint32) bool {
|
||||
newS.Add(v)
|
||||
return true
|
||||
})
|
||||
return set{m: newS}
|
||||
}
|
||||
|
||||
func (s *set) AppendBinary(data []byte) ([]byte, error) {
|
||||
// 4 bytes for the size of the set, and 4 bytes for each key.
|
||||
// list.
|
||||
data = slices.Grow(data, 4+(4*s.m.Len()))
|
||||
|
||||
// Length of the set. We only need 32 bits because the size of the set
|
||||
// couldn't exceed that on 32 bit architectures.
|
||||
sl := s.m.Len()
|
||||
data = append(data,
|
||||
byte(sl>>24),
|
||||
byte(sl>>16),
|
||||
byte(sl>>8),
|
||||
byte(sl),
|
||||
)
|
||||
|
||||
// Marshal each element in the set.
|
||||
s.m.ForEach(func(k uint32) bool {
|
||||
data = append(data,
|
||||
byte(k>>24),
|
||||
byte(k>>16),
|
||||
byte(k>>8),
|
||||
byte(k),
|
||||
)
|
||||
return true
|
||||
})
|
||||
|
||||
return data, nil
|
||||
}
|
||||
46
vendor/github.com/axiomhq/hyperloglog/utils.go
generated
vendored
Normal file
46
vendor/github.com/axiomhq/hyperloglog/utils.go
generated
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
package hyperloglog
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/bits"
|
||||
|
||||
metro "github.com/dgryski/go-metro"
|
||||
)
|
||||
|
||||
var hash = hashFunc
|
||||
|
||||
func alpha(m float64) float64 {
|
||||
switch m {
|
||||
case 16:
|
||||
return 0.673
|
||||
case 32:
|
||||
return 0.697
|
||||
case 64:
|
||||
return 0.709
|
||||
}
|
||||
return 0.7213 / (1 + 1.079/m)
|
||||
}
|
||||
|
||||
func getPosVal(x uint64, p uint8) (uint64, uint8) {
|
||||
i := bextr(x, 64-p, p) // {x63,...,x64-p}
|
||||
w := x<<p | 1<<(p-1) // {x63-p,...,x0}
|
||||
rho := uint8(bits.LeadingZeros64(w)) + 1
|
||||
return i, rho
|
||||
}
|
||||
|
||||
func linearCount(m uint32, v uint32) float64 {
|
||||
fm := float64(m)
|
||||
return fm * math.Log(fm/float64(v))
|
||||
}
|
||||
|
||||
func bextr(v uint64, start, length uint8) uint64 {
|
||||
return (v >> start) & ((1 << length) - 1)
|
||||
}
|
||||
|
||||
func bextr32(v uint32, start, length uint8) uint32 {
|
||||
return (v >> start) & ((1 << length) - 1)
|
||||
}
|
||||
|
||||
func hashFunc(e []byte) uint64 {
|
||||
return metro.Hash64(e, 1337)
|
||||
}
|
||||
24
vendor/github.com/dgryski/go-metro/LICENSE
generated
vendored
Normal file
24
vendor/github.com/dgryski/go-metro/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
This package is a mechanical translation of the reference C++ code for
|
||||
MetroHash, available at https://github.com/jandrewrogers/MetroHash
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Damian Gryski
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
6
vendor/github.com/dgryski/go-metro/README
generated
vendored
Normal file
6
vendor/github.com/dgryski/go-metro/README
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
MetroHash
|
||||
|
||||
This package is a mechanical translation of the reference C++ code for
|
||||
MetroHash, available at https://github.com/jandrewrogers/MetroHash
|
||||
|
||||
I claim no additional copyright over the original implementation.
|
||||
94
vendor/github.com/dgryski/go-metro/metro128.go
generated
vendored
Normal file
94
vendor/github.com/dgryski/go-metro/metro128.go
generated
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
package metro
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
func rotate_right(v uint64, k uint) uint64 {
|
||||
return (v >> k) | (v << (64 - k))
|
||||
}
|
||||
|
||||
func Hash128(buffer []byte, seed uint64) (uint64, uint64) {
|
||||
|
||||
const (
|
||||
k0 = 0xC83A91E1
|
||||
k1 = 0x8648DBDB
|
||||
k2 = 0x7BDEC03B
|
||||
k3 = 0x2F5870A5
|
||||
)
|
||||
|
||||
ptr := buffer
|
||||
|
||||
var v [4]uint64
|
||||
|
||||
v[0] = (seed - k0) * k3
|
||||
v[1] = (seed + k1) * k2
|
||||
|
||||
if len(ptr) >= 32 {
|
||||
v[2] = (seed + k0) * k2
|
||||
v[3] = (seed - k1) * k3
|
||||
|
||||
for len(ptr) >= 32 {
|
||||
v[0] += binary.LittleEndian.Uint64(ptr) * k0
|
||||
ptr = ptr[8:]
|
||||
v[0] = rotate_right(v[0], 29) + v[2]
|
||||
v[1] += binary.LittleEndian.Uint64(ptr) * k1
|
||||
ptr = ptr[8:]
|
||||
v[1] = rotate_right(v[1], 29) + v[3]
|
||||
v[2] += binary.LittleEndian.Uint64(ptr) * k2
|
||||
ptr = ptr[8:]
|
||||
v[2] = rotate_right(v[2], 29) + v[0]
|
||||
v[3] += binary.LittleEndian.Uint64(ptr) * k3
|
||||
ptr = ptr[8:]
|
||||
v[3] = rotate_right(v[3], 29) + v[1]
|
||||
}
|
||||
|
||||
v[2] ^= rotate_right(((v[0]+v[3])*k0)+v[1], 21) * k1
|
||||
v[3] ^= rotate_right(((v[1]+v[2])*k1)+v[0], 21) * k0
|
||||
v[0] ^= rotate_right(((v[0]+v[2])*k0)+v[3], 21) * k1
|
||||
v[1] ^= rotate_right(((v[1]+v[3])*k1)+v[2], 21) * k0
|
||||
}
|
||||
|
||||
if len(ptr) >= 16 {
|
||||
v[0] += binary.LittleEndian.Uint64(ptr) * k2
|
||||
ptr = ptr[8:]
|
||||
v[0] = rotate_right(v[0], 33) * k3
|
||||
v[1] += binary.LittleEndian.Uint64(ptr) * k2
|
||||
ptr = ptr[8:]
|
||||
v[1] = rotate_right(v[1], 33) * k3
|
||||
v[0] ^= rotate_right((v[0]*k2)+v[1], 45) * k1
|
||||
v[1] ^= rotate_right((v[1]*k3)+v[0], 45) * k0
|
||||
}
|
||||
|
||||
if len(ptr) >= 8 {
|
||||
v[0] += binary.LittleEndian.Uint64(ptr) * k2
|
||||
ptr = ptr[8:]
|
||||
v[0] = rotate_right(v[0], 33) * k3
|
||||
v[0] ^= rotate_right((v[0]*k2)+v[1], 27) * k1
|
||||
}
|
||||
|
||||
if len(ptr) >= 4 {
|
||||
v[1] += uint64(binary.LittleEndian.Uint32(ptr)) * k2
|
||||
ptr = ptr[4:]
|
||||
v[1] = rotate_right(v[1], 33) * k3
|
||||
v[1] ^= rotate_right((v[1]*k3)+v[0], 46) * k0
|
||||
}
|
||||
|
||||
if len(ptr) >= 2 {
|
||||
v[0] += uint64(binary.LittleEndian.Uint16(ptr)) * k2
|
||||
ptr = ptr[2:]
|
||||
v[0] = rotate_right(v[0], 33) * k3
|
||||
v[0] ^= rotate_right((v[0]*k2)+v[1], 22) * k1
|
||||
}
|
||||
|
||||
if len(ptr) >= 1 {
|
||||
v[1] += uint64(ptr[0]) * k2
|
||||
v[1] = rotate_right(v[1], 33) * k3
|
||||
v[1] ^= rotate_right((v[1]*k3)+v[0], 58) * k0
|
||||
}
|
||||
|
||||
v[0] += rotate_right((v[0]*k0)+v[1], 13)
|
||||
v[1] += rotate_right((v[1]*k1)+v[0], 37)
|
||||
v[0] += rotate_right((v[0]*k2)+v[1], 13)
|
||||
v[1] += rotate_right((v[1]*k3)+v[0], 37)
|
||||
|
||||
return v[0], v[1]
|
||||
}
|
||||
89
vendor/github.com/dgryski/go-metro/metro64.go
generated
vendored
Normal file
89
vendor/github.com/dgryski/go-metro/metro64.go
generated
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
//go:build noasm || !amd64 || !gc || purego
|
||||
// +build noasm !amd64 !gc purego
|
||||
|
||||
package metro
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
func Hash64(buffer []byte, seed uint64) uint64 {
|
||||
|
||||
const (
|
||||
k0 = 0xD6D018F5
|
||||
k1 = 0xA2AA033B
|
||||
k2 = 0x62992FC1
|
||||
k3 = 0x30BC5B29
|
||||
)
|
||||
|
||||
ptr := buffer
|
||||
|
||||
hash := (seed + k2) * k0
|
||||
|
||||
if len(ptr) >= 32 {
|
||||
v0, v1, v2, v3 := hash, hash, hash, hash
|
||||
|
||||
for len(ptr) >= 32 {
|
||||
v0 += binary.LittleEndian.Uint64(ptr[:8]) * k0
|
||||
v0 = bits.RotateLeft64(v0, -29) + v2
|
||||
v1 += binary.LittleEndian.Uint64(ptr[8:16]) * k1
|
||||
v1 = bits.RotateLeft64(v1, -29) + v3
|
||||
v2 += binary.LittleEndian.Uint64(ptr[16:24]) * k2
|
||||
v2 = bits.RotateLeft64(v2, -29) + v0
|
||||
v3 += binary.LittleEndian.Uint64(ptr[24:32]) * k3
|
||||
v3 = bits.RotateLeft64(v3, -29) + v1
|
||||
ptr = ptr[32:]
|
||||
}
|
||||
|
||||
v2 ^= bits.RotateLeft64(((v0+v3)*k0)+v1, -37) * k1
|
||||
v3 ^= bits.RotateLeft64(((v1+v2)*k1)+v0, -37) * k0
|
||||
v0 ^= bits.RotateLeft64(((v0+v2)*k0)+v3, -37) * k1
|
||||
v1 ^= bits.RotateLeft64(((v1+v3)*k1)+v2, -37) * k0
|
||||
hash += v0 ^ v1
|
||||
}
|
||||
|
||||
if len(ptr) >= 16 {
|
||||
v0 := hash + (binary.LittleEndian.Uint64(ptr[:8]) * k2)
|
||||
v0 = bits.RotateLeft64(v0, -29) * k3
|
||||
v1 := hash + (binary.LittleEndian.Uint64(ptr[8:16]) * k2)
|
||||
v1 = bits.RotateLeft64(v1, -29) * k3
|
||||
v0 ^= bits.RotateLeft64(v0*k0, -21) + v1
|
||||
v1 ^= bits.RotateLeft64(v1*k3, -21) + v0
|
||||
hash += v1
|
||||
ptr = ptr[16:]
|
||||
}
|
||||
|
||||
if len(ptr) >= 8 {
|
||||
hash += binary.LittleEndian.Uint64(ptr[:8]) * k3
|
||||
ptr = ptr[8:]
|
||||
hash ^= bits.RotateLeft64(hash, -55) * k1
|
||||
}
|
||||
|
||||
if len(ptr) >= 4 {
|
||||
hash += uint64(binary.LittleEndian.Uint32(ptr[:4])) * k3
|
||||
hash ^= bits.RotateLeft64(hash, -26) * k1
|
||||
ptr = ptr[4:]
|
||||
}
|
||||
|
||||
if len(ptr) >= 2 {
|
||||
hash += uint64(binary.LittleEndian.Uint16(ptr[:2])) * k3
|
||||
ptr = ptr[2:]
|
||||
hash ^= bits.RotateLeft64(hash, -48) * k1
|
||||
}
|
||||
|
||||
if len(ptr) >= 1 {
|
||||
hash += uint64(ptr[0]) * k3
|
||||
hash ^= bits.RotateLeft64(hash, -37) * k1
|
||||
}
|
||||
|
||||
hash ^= bits.RotateLeft64(hash, -28)
|
||||
hash *= k0
|
||||
hash ^= bits.RotateLeft64(hash, -29)
|
||||
|
||||
return hash
|
||||
}
|
||||
|
||||
func Hash64Str(buffer string, seed uint64) uint64 {
|
||||
return Hash64([]byte(buffer), seed)
|
||||
}
|
||||
387
vendor/github.com/dgryski/go-metro/metro_amd64.s
generated
vendored
Normal file
387
vendor/github.com/dgryski/go-metro/metro_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,387 @@
|
||||
// Code generated by command: go run asm.go -out metro_amd64.s -stubs metro_stub.go -pkg metro. DO NOT EDIT.
|
||||
|
||||
//go:build amd64 && gc && !purego && !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func Hash64(buffer []byte, seed uint64) uint64
|
||||
TEXT ·Hash64(SB), NOSPLIT, $0-40
|
||||
MOVQ seed+24(FP), AX
|
||||
MOVQ buffer_base+0(FP), CX
|
||||
MOVQ buffer_len+8(FP), DX
|
||||
MOVQ $0xd6d018f5, BX
|
||||
IMULQ BX, AX
|
||||
MOVQ $0x52bc33fedbe4cbb5, BX
|
||||
ADDQ BX, AX
|
||||
CMPQ DX, $0x20
|
||||
JLT after32
|
||||
MOVQ AX, BX
|
||||
MOVQ AX, SI
|
||||
MOVQ AX, DI
|
||||
MOVQ AX, R8
|
||||
|
||||
loop:
|
||||
MOVQ (CX), R9
|
||||
MOVQ $0xd6d018f5, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R9, BX
|
||||
RORQ $0x1d, BX
|
||||
ADDQ DI, BX
|
||||
MOVQ 8(CX), R9
|
||||
MOVQ $0xa2aa033b, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R9, SI
|
||||
RORQ $0x1d, SI
|
||||
ADDQ R8, SI
|
||||
MOVQ 16(CX), R9
|
||||
MOVQ $0x62992fc1, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R9, DI
|
||||
RORQ $0x1d, DI
|
||||
ADDQ BX, DI
|
||||
MOVQ 24(CX), R9
|
||||
MOVQ $0x30bc5b29, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R9, R8
|
||||
RORQ $0x1d, R8
|
||||
ADDQ SI, R8
|
||||
ADDQ $0x20, CX
|
||||
SUBQ $0x20, DX
|
||||
CMPQ DX, $0x20
|
||||
JGE loop
|
||||
MOVQ BX, R9
|
||||
ADDQ R8, R9
|
||||
MOVQ $0xd6d018f5, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ SI, R9
|
||||
RORQ $0x25, R9
|
||||
MOVQ $0xa2aa033b, R10
|
||||
IMULQ R10, R9
|
||||
XORQ R9, DI
|
||||
MOVQ SI, R9
|
||||
ADDQ DI, R9
|
||||
MOVQ $0xa2aa033b, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ BX, R9
|
||||
RORQ $0x25, R9
|
||||
MOVQ $0xd6d018f5, R10
|
||||
IMULQ R10, R9
|
||||
XORQ R9, R8
|
||||
MOVQ BX, R9
|
||||
ADDQ DI, R9
|
||||
MOVQ $0xd6d018f5, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R8, R9
|
||||
RORQ $0x25, R9
|
||||
MOVQ $0xa2aa033b, R10
|
||||
IMULQ R10, R9
|
||||
XORQ R9, BX
|
||||
MOVQ SI, R9
|
||||
ADDQ R8, R9
|
||||
MOVQ $0xa2aa033b, R8
|
||||
IMULQ R8, R9
|
||||
ADDQ DI, R9
|
||||
RORQ $0x25, R9
|
||||
MOVQ $0xd6d018f5, DI
|
||||
IMULQ DI, R9
|
||||
XORQ R9, SI
|
||||
XORQ SI, BX
|
||||
ADDQ BX, AX
|
||||
|
||||
after32:
|
||||
CMPQ DX, $0x10
|
||||
JLT after16
|
||||
MOVQ (CX), BX
|
||||
MOVQ $0x62992fc1, SI
|
||||
IMULQ SI, BX
|
||||
ADDQ AX, BX
|
||||
ADDQ $0x08, CX
|
||||
SUBQ $0x08, DX
|
||||
RORQ $0x1d, BX
|
||||
MOVQ $0x30bc5b29, SI
|
||||
IMULQ SI, BX
|
||||
MOVQ (CX), SI
|
||||
MOVQ $0x62992fc1, DI
|
||||
IMULQ DI, SI
|
||||
ADDQ AX, SI
|
||||
ADDQ $0x08, CX
|
||||
SUBQ $0x08, DX
|
||||
RORQ $0x1d, SI
|
||||
MOVQ $0x30bc5b29, DI
|
||||
IMULQ DI, SI
|
||||
MOVQ BX, DI
|
||||
MOVQ $0xd6d018f5, R8
|
||||
IMULQ R8, DI
|
||||
RORQ $0x15, DI
|
||||
ADDQ SI, DI
|
||||
XORQ DI, BX
|
||||
MOVQ SI, DI
|
||||
MOVQ $0x30bc5b29, R8
|
||||
IMULQ R8, DI
|
||||
RORQ $0x15, DI
|
||||
ADDQ BX, DI
|
||||
XORQ DI, SI
|
||||
ADDQ SI, AX
|
||||
|
||||
after16:
|
||||
CMPQ DX, $0x08
|
||||
JLT after8
|
||||
MOVQ (CX), BX
|
||||
MOVQ $0x30bc5b29, SI
|
||||
IMULQ SI, BX
|
||||
ADDQ BX, AX
|
||||
ADDQ $0x08, CX
|
||||
SUBQ $0x08, DX
|
||||
MOVQ AX, BX
|
||||
RORQ $0x37, BX
|
||||
MOVQ $0xa2aa033b, SI
|
||||
IMULQ SI, BX
|
||||
XORQ BX, AX
|
||||
|
||||
after8:
|
||||
CMPQ DX, $0x04
|
||||
JLT after4
|
||||
XORQ BX, BX
|
||||
MOVL (CX), BX
|
||||
MOVQ $0x30bc5b29, SI
|
||||
IMULQ SI, BX
|
||||
ADDQ BX, AX
|
||||
ADDQ $0x04, CX
|
||||
SUBQ $0x04, DX
|
||||
MOVQ AX, BX
|
||||
RORQ $0x1a, BX
|
||||
MOVQ $0xa2aa033b, SI
|
||||
IMULQ SI, BX
|
||||
XORQ BX, AX
|
||||
|
||||
after4:
|
||||
CMPQ DX, $0x02
|
||||
JLT after2
|
||||
XORQ BX, BX
|
||||
MOVW (CX), BX
|
||||
MOVQ $0x30bc5b29, SI
|
||||
IMULQ SI, BX
|
||||
ADDQ BX, AX
|
||||
ADDQ $0x02, CX
|
||||
SUBQ $0x02, DX
|
||||
MOVQ AX, BX
|
||||
RORQ $0x30, BX
|
||||
MOVQ $0xa2aa033b, SI
|
||||
IMULQ SI, BX
|
||||
XORQ BX, AX
|
||||
|
||||
after2:
|
||||
CMPQ DX, $0x01
|
||||
JLT after1
|
||||
MOVBQZX (CX), CX
|
||||
MOVQ $0x30bc5b29, DX
|
||||
IMULQ DX, CX
|
||||
ADDQ CX, AX
|
||||
MOVQ AX, CX
|
||||
RORQ $0x25, CX
|
||||
MOVQ $0xa2aa033b, DX
|
||||
IMULQ DX, CX
|
||||
XORQ CX, AX
|
||||
|
||||
after1:
|
||||
MOVQ AX, CX
|
||||
RORQ $0x1c, CX
|
||||
XORQ CX, AX
|
||||
MOVQ $0xd6d018f5, CX
|
||||
IMULQ CX, AX
|
||||
MOVQ AX, CX
|
||||
RORQ $0x1d, CX
|
||||
XORQ CX, AX
|
||||
MOVQ AX, ret+32(FP)
|
||||
RET
|
||||
|
||||
// func Hash64Str(buffer string, seed uint64) uint64
|
||||
TEXT ·Hash64Str(SB), NOSPLIT, $0-32
|
||||
MOVQ seed+16(FP), AX
|
||||
MOVQ buffer_base+0(FP), CX
|
||||
MOVQ buffer_len+8(FP), DX
|
||||
MOVQ $0xd6d018f5, BX
|
||||
IMULQ BX, AX
|
||||
MOVQ $0x52bc33fedbe4cbb5, BX
|
||||
ADDQ BX, AX
|
||||
CMPQ DX, $0x20
|
||||
JLT after32
|
||||
MOVQ AX, BX
|
||||
MOVQ AX, SI
|
||||
MOVQ AX, DI
|
||||
MOVQ AX, R8
|
||||
|
||||
loop:
|
||||
MOVQ (CX), R9
|
||||
MOVQ $0xd6d018f5, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R9, BX
|
||||
RORQ $0x1d, BX
|
||||
ADDQ DI, BX
|
||||
MOVQ 8(CX), R9
|
||||
MOVQ $0xa2aa033b, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R9, SI
|
||||
RORQ $0x1d, SI
|
||||
ADDQ R8, SI
|
||||
MOVQ 16(CX), R9
|
||||
MOVQ $0x62992fc1, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R9, DI
|
||||
RORQ $0x1d, DI
|
||||
ADDQ BX, DI
|
||||
MOVQ 24(CX), R9
|
||||
MOVQ $0x30bc5b29, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R9, R8
|
||||
RORQ $0x1d, R8
|
||||
ADDQ SI, R8
|
||||
ADDQ $0x20, CX
|
||||
SUBQ $0x20, DX
|
||||
CMPQ DX, $0x20
|
||||
JGE loop
|
||||
MOVQ BX, R9
|
||||
ADDQ R8, R9
|
||||
MOVQ $0xd6d018f5, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ SI, R9
|
||||
RORQ $0x25, R9
|
||||
MOVQ $0xa2aa033b, R10
|
||||
IMULQ R10, R9
|
||||
XORQ R9, DI
|
||||
MOVQ SI, R9
|
||||
ADDQ DI, R9
|
||||
MOVQ $0xa2aa033b, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ BX, R9
|
||||
RORQ $0x25, R9
|
||||
MOVQ $0xd6d018f5, R10
|
||||
IMULQ R10, R9
|
||||
XORQ R9, R8
|
||||
MOVQ BX, R9
|
||||
ADDQ DI, R9
|
||||
MOVQ $0xd6d018f5, R10
|
||||
IMULQ R10, R9
|
||||
ADDQ R8, R9
|
||||
RORQ $0x25, R9
|
||||
MOVQ $0xa2aa033b, R10
|
||||
IMULQ R10, R9
|
||||
XORQ R9, BX
|
||||
MOVQ SI, R9
|
||||
ADDQ R8, R9
|
||||
MOVQ $0xa2aa033b, R8
|
||||
IMULQ R8, R9
|
||||
ADDQ DI, R9
|
||||
RORQ $0x25, R9
|
||||
MOVQ $0xd6d018f5, DI
|
||||
IMULQ DI, R9
|
||||
XORQ R9, SI
|
||||
XORQ SI, BX
|
||||
ADDQ BX, AX
|
||||
|
||||
after32:
|
||||
CMPQ DX, $0x10
|
||||
JLT after16
|
||||
MOVQ (CX), BX
|
||||
MOVQ $0x62992fc1, SI
|
||||
IMULQ SI, BX
|
||||
ADDQ AX, BX
|
||||
ADDQ $0x08, CX
|
||||
SUBQ $0x08, DX
|
||||
RORQ $0x1d, BX
|
||||
MOVQ $0x30bc5b29, SI
|
||||
IMULQ SI, BX
|
||||
MOVQ (CX), SI
|
||||
MOVQ $0x62992fc1, DI
|
||||
IMULQ DI, SI
|
||||
ADDQ AX, SI
|
||||
ADDQ $0x08, CX
|
||||
SUBQ $0x08, DX
|
||||
RORQ $0x1d, SI
|
||||
MOVQ $0x30bc5b29, DI
|
||||
IMULQ DI, SI
|
||||
MOVQ BX, DI
|
||||
MOVQ $0xd6d018f5, R8
|
||||
IMULQ R8, DI
|
||||
RORQ $0x15, DI
|
||||
ADDQ SI, DI
|
||||
XORQ DI, BX
|
||||
MOVQ SI, DI
|
||||
MOVQ $0x30bc5b29, R8
|
||||
IMULQ R8, DI
|
||||
RORQ $0x15, DI
|
||||
ADDQ BX, DI
|
||||
XORQ DI, SI
|
||||
ADDQ SI, AX
|
||||
|
||||
after16:
|
||||
CMPQ DX, $0x08
|
||||
JLT after8
|
||||
MOVQ (CX), BX
|
||||
MOVQ $0x30bc5b29, SI
|
||||
IMULQ SI, BX
|
||||
ADDQ BX, AX
|
||||
ADDQ $0x08, CX
|
||||
SUBQ $0x08, DX
|
||||
MOVQ AX, BX
|
||||
RORQ $0x37, BX
|
||||
MOVQ $0xa2aa033b, SI
|
||||
IMULQ SI, BX
|
||||
XORQ BX, AX
|
||||
|
||||
after8:
|
||||
CMPQ DX, $0x04
|
||||
JLT after4
|
||||
XORQ BX, BX
|
||||
MOVL (CX), BX
|
||||
MOVQ $0x30bc5b29, SI
|
||||
IMULQ SI, BX
|
||||
ADDQ BX, AX
|
||||
ADDQ $0x04, CX
|
||||
SUBQ $0x04, DX
|
||||
MOVQ AX, BX
|
||||
RORQ $0x1a, BX
|
||||
MOVQ $0xa2aa033b, SI
|
||||
IMULQ SI, BX
|
||||
XORQ BX, AX
|
||||
|
||||
after4:
|
||||
CMPQ DX, $0x02
|
||||
JLT after2
|
||||
XORQ BX, BX
|
||||
MOVW (CX), BX
|
||||
MOVQ $0x30bc5b29, SI
|
||||
IMULQ SI, BX
|
||||
ADDQ BX, AX
|
||||
ADDQ $0x02, CX
|
||||
SUBQ $0x02, DX
|
||||
MOVQ AX, BX
|
||||
RORQ $0x30, BX
|
||||
MOVQ $0xa2aa033b, SI
|
||||
IMULQ SI, BX
|
||||
XORQ BX, AX
|
||||
|
||||
after2:
|
||||
CMPQ DX, $0x01
|
||||
JLT after1
|
||||
MOVBQZX (CX), CX
|
||||
MOVQ $0x30bc5b29, DX
|
||||
IMULQ DX, CX
|
||||
ADDQ CX, AX
|
||||
MOVQ AX, CX
|
||||
RORQ $0x25, CX
|
||||
MOVQ $0xa2aa033b, DX
|
||||
IMULQ DX, CX
|
||||
XORQ CX, AX
|
||||
|
||||
after1:
|
||||
MOVQ AX, CX
|
||||
RORQ $0x1c, CX
|
||||
XORQ CX, AX
|
||||
MOVQ $0xd6d018f5, CX
|
||||
IMULQ CX, AX
|
||||
MOVQ AX, CX
|
||||
RORQ $0x1d, CX
|
||||
XORQ CX, AX
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
10
vendor/github.com/dgryski/go-metro/metro_stub.go
generated
vendored
Normal file
10
vendor/github.com/dgryski/go-metro/metro_stub.go
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
// Code generated by command: go run asm.go -out metro_amd64.s -stubs metro_stub.go -pkg metro. DO NOT EDIT.
|
||||
|
||||
//go:build amd64 && gc && !purego && !noasm
|
||||
|
||||
package metro
|
||||
|
||||
//go:noescape
|
||||
func Hash64(buffer []byte, seed uint64) uint64
|
||||
|
||||
func Hash64Str(buffer string, seed uint64) uint64
|
||||
1
vendor/github.com/kamstrup/intmap/.gitignore
generated
vendored
Normal file
1
vendor/github.com/kamstrup/intmap/.gitignore
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*.swp
|
||||
23
vendor/github.com/kamstrup/intmap/LICENSE
generated
vendored
Normal file
23
vendor/github.com/kamstrup/intmap/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
Copyright (c) 2016, Brent Pedersen - Bioinformatics
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
52
vendor/github.com/kamstrup/intmap/README.md
generated
vendored
Normal file
52
vendor/github.com/kamstrup/intmap/README.md
generated
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
Fast hashmap with integer keys for Golang
|
||||
|
||||
[](https://godoc.org/github.com/kamstrup/intmap)
|
||||
[](https://goreportcard.com/report/github.com/kamstrup/intmap)
|
||||
|
||||
# intmap
|
||||
|
||||
import "github.com/kamstrup/intmap"
|
||||
|
||||
Package intmap is a fast hashmap implementation for Golang, specialized for maps with integer type keys.
|
||||
The values can be of any type.
|
||||
|
||||
It is a full port of https://github.com/brentp/intintmap to use type parameters (aka generics).
|
||||
|
||||
It interleaves keys and values in the same underlying array to improve locality.
|
||||
This is also known as open addressing with linear probing.
|
||||
|
||||
It is up to 3X faster than the builtin map:
|
||||
```
|
||||
name time/op
|
||||
Map64Fill-8 201ms ± 5%
|
||||
IntIntMapFill-8 207ms ±31%
|
||||
StdMapFill-8 371ms ±11%
|
||||
Map64Get10PercentHitRate-8 148µs ±40%
|
||||
IntIntMapGet10PercentHitRate-8 171µs ±50%
|
||||
StdMapGet10PercentHitRate-8 171µs ±33%
|
||||
Map64Get100PercentHitRate-8 4.50ms ± 5%
|
||||
IntIntMapGet100PercentHitRate-8 4.82ms ± 6%
|
||||
StdMapGet100PercentHitRate-8 15.5ms ±32%
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```go
|
||||
m := intmap.New[int64,int64](32768)
|
||||
m.Put(int64(1234), int64(-222))
|
||||
m.Put(int64(123), int64(33))
|
||||
|
||||
v, ok := m.Get(int64(222))
|
||||
v, ok := m.Get(int64(333))
|
||||
|
||||
m.Del(int64(222))
|
||||
m.Del(int64(333))
|
||||
|
||||
fmt.Println(m.Len())
|
||||
|
||||
m.ForEach(func(k int64, v int64) {
|
||||
fmt.Printf("key: %d, value: %d\n", k, v)
|
||||
})
|
||||
|
||||
m.Clear() // all gone, but buffers kept
|
||||
```
|
||||
458
vendor/github.com/kamstrup/intmap/map64.go
generated
vendored
Normal file
458
vendor/github.com/kamstrup/intmap/map64.go
generated
vendored
Normal file
@@ -0,0 +1,458 @@
|
||||
// Package intmap contains a fast hashmap implementation for maps with keys of any integer type
|
||||
package intmap
|
||||
|
||||
import (
|
||||
"iter"
|
||||
"math"
|
||||
)
|
||||
|
||||
// IntKey is a type constraint for values that can be used as keys in Map
|
||||
type IntKey interface {
|
||||
~int | ~uint | ~int64 | ~uint64 | ~int32 | ~uint32 | ~int16 | ~uint16 | ~int8 | ~uint8 | ~uintptr
|
||||
}
|
||||
|
||||
// pair represents a key-value pair in Map.
|
||||
//
|
||||
// It is an important detail that V is before K in the memory layout. Despite it feeling more natural to have K first!
|
||||
// We must have sizeof(pair[K,struct{}]) == sizeof(K), to minimize memory consumption when using a Set.
|
||||
// If V is last, then &p.V can point to invalid memory, which is not permitted. This makes the Go compiler emit
|
||||
// some padding for the pair struct in that case.
|
||||
// See https://github.com/kamstrup/intmap/pull/6#issuecomment-3581008879
|
||||
type pair[K IntKey, V any] struct {
|
||||
V V
|
||||
K K
|
||||
}
|
||||
|
||||
const fillFactorBase64 = 7
|
||||
const fillFactor64 = fillFactorBase64 / 10.0
|
||||
|
||||
func phiMix64(x int) int {
|
||||
h := int64(x) * int64(0x9E3779B9)
|
||||
return int(h ^ (h >> 16))
|
||||
}
|
||||
|
||||
// Map is a hashmap where the keys are some any integer type.
|
||||
// It is valid to call methods that read a nil map, similar to a standard Go map.
|
||||
// Methods valid on a nil map are Has, Get, Len, and ForEach.
|
||||
type Map[K IntKey, V any] struct {
|
||||
data []pair[K, V] // key-value pairs
|
||||
size int
|
||||
|
||||
zeroVal V // value of 'zero' key
|
||||
hasZeroKey bool // do we have 'zero' key in the map?
|
||||
}
|
||||
|
||||
// New creates a new map with keys being any integer subtype.
|
||||
// The map can store up to the given capacity before reallocation and rehashing occurs.
|
||||
func New[K IntKey, V any](capacity int) *Map[K, V] {
|
||||
return &Map[K, V]{
|
||||
data: make([]pair[K, V], arraySize(capacity, fillFactor64)),
|
||||
}
|
||||
}
|
||||
|
||||
// Has checks if the given key exists in the map.
|
||||
// Calling this method on a nil map will return false.
|
||||
func (m *Map[K, V]) Has(key K) bool {
|
||||
if m == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if key == K(0) {
|
||||
return m.hasZeroKey
|
||||
}
|
||||
|
||||
idx := m.startIndex(key)
|
||||
p := m.data[idx]
|
||||
|
||||
if p.K == K(0) { // end of chain already
|
||||
return false
|
||||
}
|
||||
if p.K == key { // we check zero prior to this call
|
||||
return true
|
||||
}
|
||||
|
||||
// hash collision, seek next hash match, bailing on first empty
|
||||
for {
|
||||
idx = m.nextIndex(idx)
|
||||
p = m.data[idx]
|
||||
if p.K == K(0) {
|
||||
return false
|
||||
}
|
||||
if p.K == key {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get returns the value if the key is found.
|
||||
// If you just need to check for existence it is easier to use Has.
|
||||
// Calling this method on a nil map will return the zero value for V and false.
|
||||
func (m *Map[K, V]) Get(key K) (V, bool) {
|
||||
if m == nil {
|
||||
var zero V
|
||||
return zero, false
|
||||
}
|
||||
|
||||
if key == K(0) {
|
||||
if m.hasZeroKey {
|
||||
return m.zeroVal, true
|
||||
}
|
||||
var zero V
|
||||
return zero, false
|
||||
}
|
||||
|
||||
idx := m.startIndex(key)
|
||||
p := m.data[idx]
|
||||
|
||||
if p.K == K(0) { // end of chain already
|
||||
var zero V
|
||||
return zero, false
|
||||
}
|
||||
if p.K == key { // we check zero prior to this call
|
||||
return p.V, true
|
||||
}
|
||||
|
||||
// hash collision, seek next hash match, bailing on first empty
|
||||
for {
|
||||
idx = m.nextIndex(idx)
|
||||
p = m.data[idx]
|
||||
if p.K == K(0) {
|
||||
var zero V
|
||||
return zero, false
|
||||
}
|
||||
if p.K == key {
|
||||
return p.V, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Put adds or updates key with value val.
|
||||
func (m *Map[K, V]) Put(key K, val V) {
|
||||
if key == K(0) {
|
||||
if !m.hasZeroKey {
|
||||
m.size++
|
||||
}
|
||||
m.zeroVal = val
|
||||
m.hasZeroKey = true
|
||||
return
|
||||
}
|
||||
|
||||
idx := m.startIndex(key)
|
||||
p := &m.data[idx]
|
||||
|
||||
if p.K == K(0) { // end of chain already
|
||||
p.K = key
|
||||
p.V = val
|
||||
if m.size >= m.sizeThreshold() {
|
||||
m.rehash()
|
||||
} else {
|
||||
m.size++
|
||||
}
|
||||
return
|
||||
} else if p.K == key { // overwrite existing value
|
||||
p.V = val
|
||||
return
|
||||
}
|
||||
|
||||
// hash collision, seek next empty or key match
|
||||
for {
|
||||
idx = m.nextIndex(idx)
|
||||
p = &m.data[idx]
|
||||
|
||||
if p.K == K(0) {
|
||||
p.K = key
|
||||
p.V = val
|
||||
if m.size >= m.sizeThreshold() {
|
||||
m.rehash()
|
||||
} else {
|
||||
m.size++
|
||||
}
|
||||
return
|
||||
} else if p.K == key {
|
||||
p.V = val
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PutIfNotExists adds the key-value pair only if the key does not already exist
|
||||
// in the map, and returns the current value associated with the key and a boolean
|
||||
// indicating whether the value was newly added or not.
|
||||
func (m *Map[K, V]) PutIfNotExists(key K, val V) (V, bool) {
|
||||
if key == K(0) {
|
||||
if m.hasZeroKey {
|
||||
return m.zeroVal, false
|
||||
}
|
||||
m.zeroVal = val
|
||||
m.hasZeroKey = true
|
||||
m.size++
|
||||
return val, true
|
||||
}
|
||||
|
||||
idx := m.startIndex(key)
|
||||
p := &m.data[idx]
|
||||
|
||||
if p.K == K(0) { // end of chain already
|
||||
p.K = key
|
||||
p.V = val
|
||||
m.size++
|
||||
if m.size >= m.sizeThreshold() {
|
||||
m.rehash()
|
||||
}
|
||||
return val, true
|
||||
} else if p.K == key {
|
||||
return p.V, false
|
||||
}
|
||||
|
||||
// hash collision, seek next hash match, bailing on first empty
|
||||
for {
|
||||
idx = m.nextIndex(idx)
|
||||
p = &m.data[idx]
|
||||
|
||||
if p.K == K(0) {
|
||||
p.K = key
|
||||
p.V = val
|
||||
m.size++
|
||||
if m.size >= m.sizeThreshold() {
|
||||
m.rehash()
|
||||
}
|
||||
return val, true
|
||||
} else if p.K == key {
|
||||
return p.V, false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ForEach iterates through key-value pairs in the map while the function f returns true.
|
||||
// This method returns immediately if invoked on a nil map.
|
||||
//
|
||||
// The iteration order of a Map is not defined, so please avoid relying on it.
|
||||
func (m *Map[K, V]) ForEach(f func(K, V) bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if m.hasZeroKey && !f(K(0), m.zeroVal) {
|
||||
return
|
||||
}
|
||||
forEach64(m.data, f)
|
||||
}
|
||||
|
||||
// All returns an iterator over key-value pairs from m.
|
||||
// The iterator returns immediately if invoked on a nil map.
|
||||
//
|
||||
// The iteration order of a Map is not defined, so please avoid relying on it.
|
||||
func (m *Map[K, V]) All() iter.Seq2[K, V] {
|
||||
return m.ForEach
|
||||
}
|
||||
|
||||
// Keys returns an iterator over keys in m.
|
||||
// The iterator returns immediately if invoked on a nil map.
|
||||
//
|
||||
// The iteration order of a Map is not defined, so please avoid relying on it.
|
||||
func (m *Map[K, V]) Keys() iter.Seq[K] {
|
||||
return func(yield func(k K) bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if m.hasZeroKey && !yield(K(0)) {
|
||||
return
|
||||
}
|
||||
|
||||
for _, p := range m.data {
|
||||
if p.K != K(0) && !yield(p.K) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Values returns an iterator over values in m.
|
||||
// The iterator returns immediately if invoked on a nil map.
|
||||
//
|
||||
// The iteration order of a Map is not defined, so please avoid relying on it.
|
||||
func (m *Map[K, V]) Values() iter.Seq[V] {
|
||||
return func(yield func(v V) bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if m.hasZeroKey && !yield(m.zeroVal) {
|
||||
return
|
||||
}
|
||||
|
||||
for _, p := range m.data {
|
||||
if p.K != K(0) && !yield(p.V) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clear removes all items from the map, but keeps the internal buffers for reuse.
|
||||
func (m *Map[K, V]) Clear() {
|
||||
var zero V
|
||||
m.hasZeroKey = false
|
||||
m.zeroVal = zero
|
||||
|
||||
// compiles down to runtime.memclr()
|
||||
for i := range m.data {
|
||||
m.data[i] = pair[K, V]{}
|
||||
}
|
||||
|
||||
m.size = 0
|
||||
}
|
||||
|
||||
func (m *Map[K, V]) rehash() {
|
||||
oldData := m.data
|
||||
m.data = make([]pair[K, V], 2*len(m.data))
|
||||
|
||||
// reset size
|
||||
if m.hasZeroKey {
|
||||
m.size = 1
|
||||
} else {
|
||||
m.size = 0
|
||||
}
|
||||
|
||||
forEach64(oldData, func(k K, v V) bool {
|
||||
m.Put(k, v)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// Len returns the number of elements in the map.
|
||||
// The length of a nil map is defined to be zero.
|
||||
func (m *Map[K, V]) Len() int {
|
||||
if m == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return m.size
|
||||
}
|
||||
|
||||
func (m *Map[K, V]) sizeThreshold() int {
|
||||
return int(uint64(len(m.data)) * fillFactorBase64 / 10)
|
||||
}
|
||||
|
||||
func (m *Map[K, V]) startIndex(key K) int {
|
||||
return startIndex(int(key), len(m.data))
|
||||
}
|
||||
|
||||
func (m *Map[K, V]) nextIndex(idx int) int {
|
||||
return nextIndex(idx, len(m.data))
|
||||
}
|
||||
|
||||
func forEach64[K IntKey, V any](pairs []pair[K, V], f func(k K, v V) bool) {
|
||||
for _, p := range pairs {
|
||||
if p.K != K(0) && !f(p.K, p.V) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Del deletes a key and its value, returning true iff the key was found
|
||||
func (m *Map[K, V]) Del(key K) bool {
|
||||
if key == K(0) {
|
||||
if m.hasZeroKey {
|
||||
m.hasZeroKey = false
|
||||
m.size--
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
idx := m.startIndex(key)
|
||||
p := m.data[idx]
|
||||
|
||||
if p.K == key {
|
||||
// any keys that were pushed back needs to be shifted nack into the empty slot
|
||||
// to avoid breaking the chain
|
||||
m.shiftKeys(idx)
|
||||
m.size--
|
||||
return true
|
||||
} else if p.K == K(0) { // end of chain already
|
||||
return false
|
||||
}
|
||||
|
||||
for {
|
||||
idx = m.nextIndex(idx)
|
||||
p = m.data[idx]
|
||||
|
||||
if p.K == key {
|
||||
// any keys that were pushed back needs to be shifted nack into the empty slot
|
||||
// to avoid breaking the chain
|
||||
m.shiftKeys(idx)
|
||||
m.size--
|
||||
return true
|
||||
} else if p.K == K(0) {
|
||||
return false
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Map[K, V]) shiftKeys(idx int) int {
|
||||
// Shift entries with the same hash.
|
||||
// We need to do this on deletion to ensure we don't have zeroes in the hash chain
|
||||
for {
|
||||
var p pair[K, V]
|
||||
lastIdx := idx
|
||||
idx = m.nextIndex(idx)
|
||||
for {
|
||||
p = m.data[idx]
|
||||
if p.K == K(0) {
|
||||
m.data[lastIdx] = pair[K, V]{}
|
||||
return lastIdx
|
||||
}
|
||||
|
||||
slot := m.startIndex(p.K)
|
||||
if lastIdx <= idx {
|
||||
if lastIdx >= slot || slot > idx {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if lastIdx >= slot && slot > idx {
|
||||
break
|
||||
}
|
||||
}
|
||||
idx = m.nextIndex(idx)
|
||||
}
|
||||
m.data[lastIdx] = p
|
||||
}
|
||||
}
|
||||
|
||||
func nextPowerOf2(x uint32) uint32 {
|
||||
if x == math.MaxUint32 {
|
||||
return x
|
||||
}
|
||||
|
||||
if x == 0 {
|
||||
return 1
|
||||
}
|
||||
|
||||
x--
|
||||
x |= x >> 1
|
||||
x |= x >> 2
|
||||
x |= x >> 4
|
||||
x |= x >> 8
|
||||
x |= x >> 16
|
||||
|
||||
return x + 1
|
||||
}
|
||||
|
||||
func arraySize(exp int, fill float64) int {
|
||||
s := nextPowerOf2(uint32(math.Ceil(float64(exp) / fill)))
|
||||
if s < 2 {
|
||||
s = 2
|
||||
}
|
||||
return int(s)
|
||||
}
|
||||
|
||||
func startIndex(key, len int) int {
|
||||
return phiMix64(key) & (len - 1)
|
||||
}
|
||||
|
||||
func nextIndex(idx, len int) int {
|
||||
return (idx + 1) & (len - 1)
|
||||
}
|
||||
59
vendor/github.com/kamstrup/intmap/set.go
generated
vendored
Normal file
59
vendor/github.com/kamstrup/intmap/set.go
generated
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
package intmap
|
||||
|
||||
import "iter"
|
||||
|
||||
// Set is a specialization of Map modelling a set of integers.
|
||||
// Like Map, methods that read from the set are valid on the nil Set.
|
||||
// This include Has, Len, and ForEach.
|
||||
type Set[K IntKey] Map[K, struct{}]
|
||||
|
||||
// NewSet creates a new Set with a given initial capacity.
|
||||
func NewSet[K IntKey](capacity int) *Set[K] {
|
||||
return (*Set[K])(New[K, struct{}](capacity))
|
||||
}
|
||||
|
||||
// Add an element to the set. Returns true if the element was not already present.
|
||||
func (s *Set[K]) Add(k K) bool {
|
||||
_, found := (*Map[K, struct{}])(s).PutIfNotExists(k, struct{}{})
|
||||
return found
|
||||
}
|
||||
|
||||
// Del deletes a key, returning true iff the key was found
|
||||
func (s *Set[K]) Del(k K) bool {
|
||||
return (*Map[K, struct{}])(s).Del(k)
|
||||
}
|
||||
|
||||
// Clear removes all items from the Set, but keeps the internal buffers for reuse.
|
||||
func (s *Set[K]) Clear() {
|
||||
(*Map[K, struct{}])(s).Clear()
|
||||
}
|
||||
|
||||
// Has returns true if the key is in the set.
|
||||
// If the set is nil this method always return false.
|
||||
func (s *Set[K]) Has(k K) bool {
|
||||
return (*Map[K, struct{}])(s).Has(k)
|
||||
}
|
||||
|
||||
// Len returns the number of elements in the set.
|
||||
// If the set is nil this method return 0.
|
||||
func (s *Set[K]) Len() int {
|
||||
return (*Map[K, struct{}])(s).Len()
|
||||
}
|
||||
|
||||
// ForEach iterates over the elements in the set while the visit function returns true.
|
||||
// This method returns immediately if the set is nil.
|
||||
//
|
||||
// The iteration order of a Set is not defined, so please avoid relying on it.
|
||||
func (s *Set[K]) ForEach(visit func(k K) bool) {
|
||||
(*Map[K, struct{}])(s).ForEach(func(k K, _ struct{}) bool {
|
||||
return visit(k)
|
||||
})
|
||||
}
|
||||
|
||||
// All returns an iterator over keys from the set.
|
||||
// The iterator returns immediately if the set is nil.
|
||||
//
|
||||
// The iteration order of a Set is not defined, so please avoid relying on it.
|
||||
func (s *Set[K]) All() iter.Seq[K] {
|
||||
return s.ForEach
|
||||
}
|
||||
10
vendor/modules.txt
vendored
10
vendor/modules.txt
vendored
@@ -292,6 +292,9 @@ github.com/aws/smithy-go/traits
|
||||
github.com/aws/smithy-go/transport/http
|
||||
github.com/aws/smithy-go/transport/http/internal/io
|
||||
github.com/aws/smithy-go/waiter
|
||||
# github.com/axiomhq/hyperloglog v0.0.0-00010101000000-000000000000 => github.com/makasim/hyperloglog v0.0.10-reuse-memory
|
||||
## explicit; go 1.23
|
||||
github.com/axiomhq/hyperloglog
|
||||
# github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3
|
||||
## explicit; go 1.20
|
||||
github.com/bboreham/go-loser
|
||||
@@ -333,6 +336,9 @@ github.com/davecgh/go-spew/spew
|
||||
# github.com/dennwc/varint v1.0.0
|
||||
## explicit; go 1.12
|
||||
github.com/dennwc/varint
|
||||
# github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33
|
||||
## explicit
|
||||
github.com/dgryski/go-metro
|
||||
# github.com/envoyproxy/go-control-plane/envoy v1.37.0
|
||||
## explicit; go 1.24.0
|
||||
github.com/envoyproxy/go-control-plane/envoy/admin/v3
|
||||
@@ -492,6 +498,9 @@ github.com/jpillora/backoff
|
||||
# github.com/json-iterator/go v1.1.12
|
||||
## explicit; go 1.12
|
||||
github.com/json-iterator/go
|
||||
# github.com/kamstrup/intmap v0.5.2
|
||||
## explicit; go 1.23
|
||||
github.com/kamstrup/intmap
|
||||
# github.com/klauspost/compress v1.18.6
|
||||
## explicit; go 1.24
|
||||
github.com/klauspost/compress
|
||||
@@ -1264,3 +1273,4 @@ sigs.k8s.io/structured-merge-diff/v6/value
|
||||
# sigs.k8s.io/yaml v1.6.0
|
||||
## explicit; go 1.22
|
||||
sigs.k8s.io/yaml
|
||||
# github.com/axiomhq/hyperloglog => github.com/makasim/hyperloglog v0.0.10-reuse-memory
|
||||
|
||||
Reference in New Issue
Block a user