VictoriaMetrics/app/vmstorage/servers/vmselect.go

package servers

import (
	"flag"
	"fmt"
	"net/http"
	"sync"
	"time"

	"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
)

var (
	maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be scanned during every query. "+
		"This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect")
	maxTagKeys = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search. "+
		"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
	maxTagValues = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search. "+
		"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
	maxTagValueSuffixesPerSearch = flag.Int("search.maxTagValueSuffixesPerSearch", 100e3, "The maximum number of tag value suffixes returned from /metrics/find")
	maxConcurrentRequests        = flag.Int("search.maxConcurrentRequests", 2*cgroup.AvailableCPUs(), "The maximum number of concurrent vmselect requests "+
		"the vmstorage can process at -vmselectAddr. It shouldn't be high, since a single request usually saturates a CPU core, and many concurrently executed requests "+
		"may require high amounts of memory. See also -search.maxQueueDuration")
	maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the incoming vmselect request waits for execution "+
		"when -search.maxConcurrentRequests limit is reached")

	disableRPCCompression = flag.Bool("rpc.disableCompression", false, "Whether to disable compression of the data sent from vmstorage to vmselect. "+
		"This reduces CPU usage at the cost of higher network bandwidth usage")
	denyQueriesOutsideRetention = flag.Bool("denyQueriesOutsideRetention", false, "Whether to deny queries outside of the configured -retentionPeriod. "+
		"When set, then /api/v1/query_range would return '503 Service Unavailable' error for queries with 'from' value outside -retentionPeriod. "+
		"This may be useful when multiple data sources with distinct retentions are hidden behind query-tee")
)

var (
	maxUniqueTimeseriesValue     int
	maxUniqueTimeseriesValueOnce sync.Once
)

// NewVMSelectServer starts new server at the given addr, which serves vmselect requests from the given s.
func NewVMSelectServer(addr string, s *storage.Storage) (*vmselectapi.Server, error) {
	api := &vmstorageAPI{
		s: s,
	}
	limits := vmselectapi.Limits{
		MaxLabelNames:                 *maxTagKeys,
		MaxLabelValues:                *maxTagValues,
		MaxTagValueSuffixes:           *maxTagValueSuffixesPerSearch,
		MaxConcurrentRequests:         *maxConcurrentRequests,
		MaxConcurrentRequestsFlagName: "search.maxConcurrentRequests",
		MaxQueueDuration:              *maxQueueDuration,
		MaxQueueDurationFlagName:      "search.maxQueueDuration",
	}
	return vmselectapi.NewServer(addr, api, limits, *disableRPCCompression)
}

// vmstorageAPI impelements vmselectapi.API
type vmstorageAPI struct {
	s *storage.Storage
}

func (api *vmstorageAPI) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (vmselectapi.BlockIterator, error) {
	// TODO(rtm0): Return empty result if sq.AccountID, sq.ProjectID do not
	// match tenantID from flag and sq is not multitenant.

	tr := sq.GetTimeRange()
	if err := checkTimeRange(api.s, tr); err != nil {
		return nil, err
	}
	maxMetrics := getMaxMetrics(sq.MaxMetrics)
	tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
	if err != nil {
		return nil, err
	}
	if len(tfss) == 0 {
		return nil, fmt.Errorf("missing tag filters")
	}
	bi := getBlockIterator()
	bi.sr.Init(qt, api.s, tfss, tr, maxMetrics, deadline)
	if err := bi.sr.Error(); err != nil {
		bi.MustClose()
		return nil, err
	}
	return bi, nil
}

func (api *vmstorageAPI) SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) ([]string, error) {
	// TODO(rtm0): Return empty result if sq.AccountID, sq.ProjectID do not
	// match tenantID from flag and sq is not multitenant.

	tr := sq.GetTimeRange()
	maxMetrics := sq.MaxMetrics
	if maxMetrics <= 0 {
		// fallback to maxUniqueTimeSeries if no limit is provided,
		// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
		maxMetrics = GetMaxUniqueTimeSeries()
	}
	tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
	if err != nil {
		return nil, err
	}
	if len(tfss) == 0 {
		return nil, fmt.Errorf("missing tag filters")
	}
	return api.s.SearchMetricNames(qt, tfss, tr, maxMetrics, deadline)
}

func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
	// TODO(rtm0): Return empty result if sq.AccountID, sq.ProjectID do not
	// match tenantID from flag and sq is not multitenant.

	tr := sq.GetTimeRange()
	maxMetrics := sq.MaxMetrics
	if maxMetrics <= 0 {
		// fallback to maxUniqueTimeSeries if no limit is provided,
		// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
		maxMetrics = GetMaxUniqueTimeSeries()
	}
	tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
	if err != nil {
		return nil, err
	}
	return api.s.SearchLabelValues(qt, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
}

func (api *vmstorageAPI) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
	maxSuffixes int, deadline uint64) ([]string, error) {
	// TODO(rtm0): Return empty result if accountID, projectID do not match
	// tenantID from flag.

	suffixes, err := api.s.SearchTagValueSuffixes(qt, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, deadline)
	if err != nil {
		return nil, err
	}
	if len(suffixes) >= maxSuffixes {
		return nil, fmt.Errorf("more than -search.maxTagValueSuffixesPerSearch=%d suffixes returned; "+
			"either narrow down the search or increase -search.maxTagValueSuffixesPerSearch command-line flag value", maxSuffixes)
	}
	return suffixes, nil
}

func (api *vmstorageAPI) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
	// TODO(rtm0): Return empty result if sq.AccountID, sq.ProjectID do not
	// match tenantID from flag and sq is not multitenant.

	tr := sq.GetTimeRange()
	maxMetrics := sq.MaxMetrics
	if maxMetrics <= 0 {
		// fallback to maxUniqueTimeSeries if no limit is provided,
		// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
		maxMetrics = GetMaxUniqueTimeSeries()
	}
	tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
	if err != nil {
		return nil, err
	}
	return api.s.SearchLabelNames(qt, tfss, tr, maxLabelNames, maxMetrics, deadline)
}

func (api *vmstorageAPI) SeriesCount(_ *querytracer.Tracer, accountID, projectID uint32, deadline uint64) (uint64, error) {
	// TODO(rtm0): Return 0 if accountID, projectID do not match tenantID from
	// flag.
	return api.s.GetSeriesCount(deadline)
}

func (api *vmstorageAPI) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
	// TODO(rtm0): Return the tenantID from flag.
	return []string{"0:0"}, nil
	// return api.s.SearchTenants(qt, tr, deadline)
}

func (api *vmstorageAPI) TSDBStatus(qt *querytracer.Tracer, sq *storage.SearchQuery, focusLabel string, topN int, deadline uint64) (*storage.TSDBStatus, error) {
	// TODO(rtm0): Return empty result if sq.AccountID, sq.ProjectID do not
	// match tenantID from flag and sq is not multitenant.

	tr := sq.GetTimeRange()
	maxMetrics := sq.MaxMetrics
	if maxMetrics <= 0 {
		// fallback to maxUniqueTimeSeries if no limit is provided,
		// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
		maxMetrics = GetMaxUniqueTimeSeries()
	}
	tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
	if err != nil {
		return nil, err
	}
	date := uint64(sq.MinTimestamp) / (24 * 3600 * 1000)
	return api.s.GetTSDBStatus(qt, tfss, date, focusLabel, topN, maxMetrics, deadline)
}

func (api *vmstorageAPI) DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (int, error) {
	// TODO(rtm0): Return empty result if sq.AccountID, sq.ProjectID do not
	// match tenantID from flag and sq is not multitenant.

	tr := sq.GetTimeRange()
	maxMetrics := sq.MaxMetrics
	if maxMetrics <= 0 {
		// fallback to maxUniqueTimeSeries if no limit is provided,
		// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
		maxMetrics = GetMaxUniqueTimeSeries()
	}
	tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
	if err != nil {
		return 0, err
	}
	if len(tfss) == 0 {
		return 0, fmt.Errorf("missing tag filters")
	}
	return api.s.DeleteSeries(qt, tfss, maxMetrics)
}

func (api *vmstorageAPI) RegisterMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, _ uint64) error {
	return fmt.Errorf("not implemented")
}

func (api *vmstorageAPI) GetMetricNamesUsageStats(qt *querytracer.Tracer, tt *storage.TenantToken, limit, le int, matchPattern string, _ uint64) (metricnamestats.StatsResult, error) {
	// TODO(rtm0): Return empty result if tt do not match tenantID from flag.
	return api.s.GetMetricNamesStats(qt, limit, le, matchPattern), nil
}

func (api *vmstorageAPI) ResetMetricNamesUsageStats(qt *querytracer.Tracer, _ uint64) error {
	api.s.ResetMetricNamesStats(qt)
	return nil
}

func (api *vmstorageAPI) setupTfss(qt *querytracer.Tracer, sq *storage.SearchQuery, tr storage.TimeRange, maxMetrics int, deadline uint64) ([]*storage.TagFilters, error) {
	tfss := make([]*storage.TagFilters, 0, len(sq.TagFilterss))
	for _, tagFilters := range sq.TagFilterss {
		tfs := storage.NewTagFilters()
		for i := range tagFilters {
			tf := &tagFilters[i]
			if string(tf.Key) == "__graphite__" {
				query := tf.Value
				qtChild := qt.NewChild("searching for series matching __graphite__=%q", query)
				paths, err := api.s.SearchGraphitePaths(qtChild, tr, query, maxMetrics, deadline)
				qtChild.Donef("found %d series", len(paths))
				if err != nil {
					return nil, fmt.Errorf("error when searching for Graphite paths for query %q: %w", query, err)
				}
				if len(paths) >= maxMetrics {
					return nil, fmt.Errorf("more than %d time series match Graphite query %q; "+
						"either narrow down the query or increase the corresponding -search.max* command-line flag value at vmselect nodes; "+
						"see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits", maxMetrics, query)
				}
				tfs.AddGraphiteQuery(query, paths, tf.IsNegative)
				continue
			}
			if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
				return nil, fmt.Errorf("cannot parse tag filter %s: %w", tf, err)
			}
		}
		tfss = append(tfss, tfs)
	}
	return tfss, nil
}

func (api *vmstorageAPI) GetMetadataRecords(qt *querytracer.Tracer, tt *storage.TenantToken, limit int, metricName string, deadline uint64) ([]*metricsmetadata.Row, error) {
	// TODO(rtm0): Return empty result if tt do not match tenantID from flag.
	return api.s.GetMetadataRows(qt, limit, metricName), nil
}

// blockIterator implements vmselectapi.BlockIterator
type blockIterator struct {
	sr storage.Search
	mb storage.MetricBlock
}

var blockIteratorsPool sync.Pool

func (bi *blockIterator) MustClose() {
	bi.sr.MustClose()
	bi.mb.MetricName = nil
	bi.mb.Block.Reset()
	blockIteratorsPool.Put(bi)
}

func getBlockIterator() *blockIterator {
	v := blockIteratorsPool.Get()
	if v == nil {
		v = &blockIterator{}
	}
	return v.(*blockIterator)
}

func (bi *blockIterator) NextBlock(dst []byte) ([]byte, bool) {
	if !bi.sr.NextMetricBlock() {
		return dst, false
	}
	mb := bi.mb
	mb.MetricName = bi.sr.MetricBlockRef.MetricName
	bi.sr.MetricBlockRef.BlockRef.MustReadBlock(&mb.Block)
	dst = mb.Marshal(dst[:0])
	return dst, true
}

func (bi *blockIterator) Error() error {
	return bi.sr.Error()
}

// checkTimeRange returns true if the given tr is denied for querying.
func checkTimeRange(s *storage.Storage, tr storage.TimeRange) error {
	if !*denyQueriesOutsideRetention {
		return nil
	}
	retentionMsecs := s.RetentionMsecs()
	minAllowedTimestamp := int64(fasttime.UnixTimestamp()*1000) - retentionMsecs
	if tr.MinTimestamp > minAllowedTimestamp {
		return nil
	}
	return &httpserver.ErrorWithStatusCode{
		Err: fmt.Errorf("the given time range %s is outside the allowed retention %.3f days according to -denyQueriesOutsideRetention",
			&tr, float64(retentionMsecs)/(24*3600*1000)),
		StatusCode: http.StatusServiceUnavailable,
	}
}

func getMaxMetrics(searchQueryLimit int) int {
	if searchQueryLimit <= 0 {
		return GetMaxUniqueTimeSeries()
	}
	// searchQueryLimit cannot exceed `-search.maxUniqueTimeseries`
	if *maxUniqueTimeseries != 0 && searchQueryLimit > *maxUniqueTimeseries {
		searchQueryLimit = *maxUniqueTimeseries
	}
	return searchQueryLimit
}

// GetMaxUniqueTimeSeries returns `-search.maxUniqueTimeseries` or the auto-calculated value based on available resources.
// The calculation is split into calculateMaxUniqueTimeSeriesForResource for unit testing.
func GetMaxUniqueTimeSeries() int {
	maxUniqueTimeseriesValueOnce.Do(func() {
		maxUniqueTimeseriesValue = *maxUniqueTimeseries
		if maxUniqueTimeseriesValue <= 0 {
			maxUniqueTimeseriesValue = calculateMaxUniqueTimeSeriesForResource(*maxConcurrentRequests, memory.Remaining())
		}
	})
	return maxUniqueTimeseriesValue
}

// calculateMaxUniqueTimeSeriesForResource calculate the max metrics limit calculated by available resources.
func calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequests, remainingMemory int) int {
	if maxConcurrentRequests <= 0 {
		// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
		// In such cases, fallback to unlimited.
		logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
		return 2e9
	}

	// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
	// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
	mts := remainingMemory / 200 / maxConcurrentRequests
	logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
	return mts
}