Compare commits

...

87 Commits

Author SHA1 Message Date
Aliaksandr Valialkin
88f8670ede lib/fs: add MustStopDirRemover for waiting until pending directories are removed on graceful shutdown
This patch is mainly required for laggy NFS. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/162
2019-09-05 11:13:17 +03:00
Aliaksandr Valialkin
9eb5de334f lib/storage: typo fix 2019-09-04 19:58:01 +03:00
Aliaksandr Valialkin
6954e126fc app/vmselect/promql: ignore grouping by destination label in count_values, since such a grouping is performed automatically 2019-09-04 19:58:01 +03:00
Aliaksandr Valialkin
bce35b8dd9 README.md: mention that Prometheus doesn't drop data when VictoriaMetrics restarts 2019-09-04 18:40:39 +03:00
Aliaksandr Valialkin
16dd145586 lib/storage: remove duplicate tag keys on MetricName.Marshal call
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/172
2019-09-04 18:13:45 +03:00
Aliaksandr Valialkin
cd2c9e39da deployment/docker: switch Go builder from Go 1.12.9 to Go 1.13.0 2019-09-04 17:17:23 +03:00
Aliaksandr Valialkin
305e7bc981 app/vmselect/promql: do not return artificial points beyond the last point in time series 2019-09-04 16:35:34 +03:00
Aliaksandr Valialkin
9721d06c6a app/vmselect/prometheus: do not adjust start and end args in /api/v1/query_range if nocache=1 arg is set
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/171
2019-09-04 13:10:09 +03:00
Aliaksandr Valialkin
4862e93024 lib/fs: try harder with directory removal on NFS in the event of temporary lock
Do not give up after 11 attempts of directory removal on laggy NFS.

Add `vm_nfs_dir_remove_failed_attempts_total` metric for counting the number of failed attempts
on directory removal.

Log failed attempts on directory removal after long sleep times.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/162
2019-09-04 12:24:50 +03:00
Aliaksandr Valialkin
db4560ca31 app/vmselect/promql: reset timeseries name on group_left and group_right as Prometheus does 2019-09-03 20:42:54 +03:00
Aliaksandr Valialkin
1575a560f0 app/vmselect/netstorage: adaptively adjust the maximum inmemory file size for storing temporary blocks
The maximum inmemory file size now depends on `-memory.allowedPercent`.
This should improve performance and reduce the number of filesystem calls
on machines with big amounts of RAM when performing heavy queries
over big number of samples and time series.
2019-09-03 13:32:09 +03:00
Aliaksandr Valialkin
e1d76ec1f3 lib/storage: invalidate tagFilters -> TSIDS cache when newly added index data becomes visible to search
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/163
2019-08-29 15:08:35 +03:00
Aliaksandr Valialkin
aeaa5de5fe lib/prombp: apply ba06b47c16
The following commands used:

gofmt -r '(uint64(x)&0x7F)<<shift -> uint64(x&0x7F)<<shift' -w ./lib/prompb/
gofmt -r '(int64(x)&0x7F)<<shift -> int64(x&0x7F)<<shift' -w ./lib/prompb/
2019-08-29 13:35:27 +03:00
Aliaksandr Valialkin
4c0a262a2e .github/workflows: verify builds on freebsd and darwin 2019-08-28 23:05:15 +03:00
Aliaksandr Valialkin
3685fc18d5 Makefile: extract app-local and app-local-pure build rules 2019-08-28 01:34:58 +03:00
Aliaksandr Valialkin
ede7ad3703 app/victoria-metrics: add missing victoria-metrics prefix to --version output when building with make victoria-metrics 2019-08-28 01:28:08 +03:00
Aliaksandr Valialkin
9196c085a7 all: port to FreeBSD on GOARCH=amd64 2019-08-28 01:19:23 +03:00
Aliaksandr Valialkin
3802ae9269 README.md: recommend checking which metrics will be deleted before deleting them 2019-08-27 15:01:16 +03:00
Artem Navoiev
b0090dbd86 add github actions (#160) 2019-08-27 14:42:46 +03:00
Aliaksandr Valialkin
603a79b357 app/vmstorage: increase default values for search.maxTagKeys, search.maxTagValues and search.maxUniqueTimeseries 2019-08-27 14:29:53 +03:00
Aliaksandr Valialkin
2655220c58 lib/storage: go fmt 2019-08-27 14:29:51 +03:00
Aliaksandr Valialkin
bf915fc0db lib/storage: report proper maxMetrics limit when more than -search.maxUniqueTimeseries series match the given filters 2019-08-27 14:21:42 +03:00
Aliaksandr Valialkin
2fc157ff7a lib/storage: properly handle (?i) in the tag filter regexp
Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/161
2019-08-26 00:44:45 +03:00
Aliaksandr Valialkin
0dc0006f34 lib/storage: calculate the maximum number of rows per small part from -memory.allowedPercent
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/159

This simplifies error detection additionally to the `vm_rows_ignored_total` counters.
2019-08-25 15:31:47 +03:00
Aliaksandr Valialkin
4b688fffee lib/storage: calculate the maximum number of rows per small part from -memory.allowedPercent
This should improve query speed over recent data on machines with big amounts of RAM
2019-08-25 14:41:12 +03:00
Aliaksandr Valialkin
1402a6b981 lib/storage: properly limit the number of output rows in small and big parts storage
Previously small parts storage didn't take into account the available disk space for big parts.
2019-08-25 14:41:12 +03:00
Aliaksandr Valialkin
3308279c4e lib/storage: remove outdated comment on maxRowsPerSmallPart
The commend became outdated after the commit ed6ac1a5df027f0dfc22448e3b27c26b6f77c67a,
which stops merging of small parts on graceful shutdown instead of waiting
for their completion.
2019-08-25 13:47:32 +03:00
Aliaksandr Valialkin
fb909cf710 app/vminsert/influx: set db label only if Influx line doesnt have db tag 2019-08-24 13:52:48 +03:00
Aliaksandr Valialkin
c4e75f09dc README.md: mention that -retentionPeriod must cover the backfilled data 2019-08-24 13:52:48 +03:00
Aliaksandr Valialkin
fb8840ac38 vendor: update github.com/valyala/quicktemplate from v1.1.1 to v1.2.0 2019-08-24 13:41:15 +03:00
Aliaksandr Valialkin
9c9221d1b2 app/vminsert: skip empty tags 2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin
70ca018a57 app/vminsert/opentsdbhttp: skip invalid rows and continue parsing the remaining rows
Invalid rows are logged and counted in `vm_rows_invalid_total{type="opentsdb-http"}` metric
2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin
4266091e4f app/vminsert/opentsdb: skip invalid rows and continue parsing the remaining rows
Invalid rows are logged and counted in `vm_rows_invalid_total{type="opentsdb"}` metric
2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin
8001d29b6e app/vminsert/graphite: skip invalid rows and continue parsing the remaining rows
Invalid rows are logged and counted in `vm_rows_invalid_total{type="graphite"}` metric
2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin
9d3f1fcbb9 app/vminsert/influx: skip invalid rows and continue parsing the remaining rows
Invalid influx lines are logged and counted in `vm_rows_invalid_total{type="influx"}` metric.
2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin
ba7b3806be app/vminsert/influx: do not allow escaping newline char, since they dont occur in real life
The prefious report with escaped newline chars in influx line protocol was false alarm.
2019-08-23 18:42:05 +03:00
Aliaksandr Valialkin
7fa88c6efc app/vminsert/opentsdbhttp: allow timestamp as float64 and as string, since it occurs in real life 2019-08-23 18:35:41 +03:00
Aliaksandr Valialkin
4da34b11f8 app/vminsert/influx: handle \r\n aka crlf influx line endings from windows world
Such lines exist in real life.
2019-08-23 18:28:49 +03:00
Aliaksandr Valialkin
a18317adbc app/vminsert/influx: allow escaping newline char
Though newline char isn't mentioned in escape rules at https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/ ,
there are reports that such chars occur in real life
2019-08-23 15:14:46 +03:00
Aliaksandr Valialkin
44d7fc599d app/vminsert/influx: skip comments starting with # in influx line protocol 2019-08-23 14:43:09 +03:00
Aliaksandr Valialkin
dce6079379 README.md: add a section about Go profiling 2019-08-23 13:37:09 +03:00
Aliaksandr Valialkin
98419c00ef vendor: make vendor-update 2019-08-23 10:02:10 +03:00
Aliaksandr Valialkin
ac004665b5 all: return 503 http error if service is temporarily unavailable
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/156
2019-08-23 09:55:07 +03:00
Aliaksandr Valialkin
8c03a8c4b4 app/vminsert: allow setting the maximum number of labels per time series via -maxLabelsPerTimeseries 2019-08-23 08:45:26 +03:00
Aliaksandr Valialkin
8a126c2865 README.md: mention that VictoriaMetrics supports enterprise workloads 2019-08-22 18:00:47 +03:00
Aliaksandr Valialkin
380cae23a0 lib/storage: add benchmarks for regexp filter match / mismatch
These benchmarks allow estimate the performance of regexp filters in promql
2019-08-22 16:36:42 +03:00
Aliaksandr Valialkin
1272e407b2 app/vmselect/promql: attempt to repair invalid bucket counts passed to histogram_quantile
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/136
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/154
2019-08-22 14:39:46 +03:00
Aliaksandr Valialkin
5f33fc8e46 app/vminsert: add ability to ingest data via HTTP OpenTSDB /api/put requests
This is manual merge of the https://github.com/VictoriaMetrics/VictoriaMetrics/pull/152
Thanks to nustinov@gmail.com for the initial pull request.
2019-08-22 12:28:32 +03:00
Aliaksandr Valialkin
ec8125606d app/vminsert/opentsdb: fix BenchmarkRowsUnmarshal by adding missing put prefixes to each line 2019-08-21 19:14:47 +03:00
Aliaksandr Valialkin
f4a38f7fb1 app/vmselect/promql: fix panic on -search.disableCache
Reset the cache if it is disabled instead of stopping, since it is stopped on graceful shutdown.
2019-08-21 17:11:52 +03:00
Aliaksandr Valialkin
ab740afd0d app/vmselect/promql: explain why empty timeseries arent removed in transformLabelValue 2019-08-21 11:29:24 +03:00
Aliaksandr Valialkin
7b5168adfb app/vmselect/promql: remove NaNs from /api/v1/query_range output like Prometheus does
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/153
2019-08-20 23:01:41 +03:00
Aliaksandr Valialkin
a0d480fbf3 app/vmselect/promql: pre-allocate memory for map for checking for duplicate timeseries
This should reduce memory allocations for big number of timeseries
2019-08-20 23:01:39 +03:00
Aliaksandr Valialkin
0dfc1ace53 README.md: add a section about backfilling 2019-08-20 00:34:51 +03:00
Aliaksandr Valialkin
d3fd113a80 app/vmselect/promql: add label_value(q, label_name) func, which returns numeric value labels with name label_name in q 2019-08-20 00:28:34 +03:00
Aliaksandr Valialkin
4f738c8a15 lib/storage: try slower path for searching the tag filter with the minimum number of matching time series before giving up with increase -search.maxUniqueTimeseries error 2019-08-19 16:04:21 +03:00
Aliaksandr Valialkin
dd86e6130c app/vmselect/promql: independently track offset hints for tStart and tEnd
This should improve performance if timeseries starts or ends on the selected time range
2019-08-19 13:40:14 +03:00
Aliaksandr Valialkin
6a27657d73 app/vmselect/promql: optimize search for timestamp boundaries in rollupConfig.Do
This should improve the performance of queries over big number of time series
with big number of output points.
2019-08-19 13:03:29 +03:00
Aliaksandr Valialkin
c23b66a1ad lib/storage: pre-allocate memory for blockHeader slice in unmarshalBlockHeaders
This reduces memory usage and memory fragmentation when working with big number of time series
2019-08-19 12:46:33 +03:00
Aliaksandr Valialkin
be39414f9c deployment/docker: switch Go builder from go1.12.8 to go1.12.9 2019-08-18 22:07:58 +03:00
Aliaksandr Valialkin
e74fb23189 app/vmselect/promql: add scrape_interval(q[d]) function, which would return scrape interval for q over d 2019-08-18 21:08:26 +03:00
Aliaksandr Valialkin
582fdc059a app/vmselect/promql: hande comparisons with NaN similar to Prometheus
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150
2019-08-18 00:25:50 +03:00
Aliaksandr Valialkin
1c108fc494 app/vmselect/promql: add lifetime(q[d]) function, which returns the lifetime of q over d in seconds.
This function is useful for determining time series lifetime.
`d` must exceed the expected lifetime of the time series, otherwise
the function would return values close to `d`.
2019-08-16 11:59:32 +03:00
Aliaksandr Valialkin
d6b5ed6d39 app/vmselect/promql: fix corner-case calculation for ideriv 2019-08-16 11:59:28 +03:00
Aliaksandr Valialkin
639b14e8ab app/vmselect/promql: properly handle corner cases for rollup functions 2019-08-15 23:29:59 +03:00
Aliaksandr Valialkin
483de1cc06 lib/workingsetcache: automatically detect when it is better to double cache capacity 2019-08-15 22:57:55 +03:00
Aliaksandr Valialkin
9e0896055d deployment/docker: switch Go builder from go1.12.7 to go1.12.8 2019-08-15 20:43:36 +03:00
Aliaksandr Valialkin
5bb61b8b38 vendor: update github.com/valyala/gozstd from v1.5.1 to v1.6.0 2019-08-15 12:56:42 +03:00
Aliaksandr Valialkin
75a58dee02 README.md: typo fix 2019-08-14 03:28:07 +03:00
Aliaksandr Valialkin
5b41122292 lib/storage: properly cache tagFilters -> TSIDs entries from historical index 2019-08-14 02:29:58 +03:00
Aliaksandr Valialkin
964c296f96 lib/storage: compress contents of cache for tagFilters -> TSIDs
This should increase cache capacity
2019-08-14 02:29:52 +03:00
Aliaksandr Valialkin
9ecb994671 app/vmselect/promql: store compressed results in the cache
This should increase rollup results cache capacity.
2019-08-14 02:29:45 +03:00
Aliaksandr Valialkin
9d41e0dcae README.md: reduce the recommended max_shards value according to test results
See https://github.com/prometheus/prometheus/issues/5803#issuecomment-520973662
2019-08-13 22:33:10 +03:00
Aliaksandr Valialkin
09fc6e22e5 all: use workingsetcache instead of fastcache
This should reduce the amount of RAM required for processing time series
with non-zero churn rate.

The previous cache behavior can be restored with `-cache.oldBehavior` command-line flag.
2019-08-13 21:39:34 +03:00
Aliaksandr Valialkin
99c37c2c96 lib/fs: add test for IsTemporaryFileName 2019-08-13 21:33:45 +03:00
Aliaksandr Valialkin
06c2c25544 Makefile: consistency renaming: check_all -> check-all 2019-08-13 21:31:19 +03:00
Aliaksandr Valialkin
ec1b185991 lib/storage: remove broken BenchmarkIndexDBSearchTSIDs 2019-08-13 20:22:08 +03:00
Aliaksandr Valialkin
0967683ae9 lib: move common code for creating flock.lock file into fs.CreateFlockFile 2019-08-13 01:45:46 +03:00
Aliaksandr Valialkin
ad8a43b4e1 README.md: fix metric names in influx line protocol example
Default separator between `measurement` and `field_name` is `_`.
2019-08-12 15:58:34 +03:00
Aliaksandr Valialkin
7346982763 README.md: mention that Influx line protocol accepts timestamps in nanoseconds by default 2019-08-12 15:31:52 +03:00
Aliaksandr Valialkin
5d8d110010 lib/fs: atomically create file with the given contents on WriteFileAtomically
This should prevent from `transaction` and `metadata.json` files corruption
on unclean shutdown such as OOM, `kill -9`, power loss, etc.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/148
2019-08-12 15:02:55 +03:00
Aliaksandr Valialkin
0b488f1e37 lib/storage: do not change timestamps to constant rate if values are constant or have constant delta
This breaks the original timestamps, which results in issues like
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/120 and
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/141 .
2019-08-06 15:40:07 +03:00
Aliaksandr Valialkin
b8bb74ffc6 app/vmstorage: add vm_concurrent_addrows_* metrics for tracking concurrency for Storage.AddRows calls
Track also the number of dropped rows due to the exceeded timeout
on concurrency limit for Storage.AddRows. This number is tracked in `vm_concurrent_addrows_dropped_rows_total`
2019-08-06 15:08:33 +03:00
Aliaksandr Valialkin
5c9e48417a vendor: update github.com/VictoriaMetrics/metrics to v1.7.1 2019-08-05 19:21:36 +03:00
Aliaksandr Valialkin
5c83f8e203 app: add vm_concurrent_ metrics for visibility in concurrency limiters for vminsert and vmselect 2019-08-05 18:30:57 +03:00
Aliaksandr Valialkin
05713469c3 vendor: make vendor-update 2019-08-05 10:33:21 +03:00
Aliaksandr Valialkin
8822079b77 lib/storage: properly reset partSearch.fetchData in partSearch.reset 2019-08-05 09:56:06 +03:00
160 changed files with 5840 additions and 1345 deletions

42
.github/workflows/main.yml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: main
on:
- push
jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- name: Setup Go
uses: actions/setup-go@v1
with:
go-version: 1.12
id: go
- name: Code checkout
uses: actions/checkout@v1
- name: Dependencies
env:
GO111MODULE: off
run: |
go get -v golang.org/x/lint/golint
go get -u github.com/kisielk/errcheck
- name: Build
env:
GO111MODULE: on
run: |
export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
make check-all
git diff --exit-code
make test-full
make test-pure
make victoria-metrics
make victoria-metrics-pure
make victoria-metrics-arm
make victoria-metrics-arm64
GOOS=freebsd go build -mod=vendor ./app/victoria-metrics
GOOS=darwin go build -mod=vendor ./app/victoria-metrics
- name: Publish coverage
uses: codecov/codecov-action@v1.0.0
with:
token: ${{secrets.CODECOV_TOKEN}}
file: ./coverage.txt

View File

@@ -1,26 +0,0 @@
language: go
go:
- 1.12.x
install: make
env:
- GO111MODULE=on
before_install:
- GO111MODULE=off go get -v golang.org/x/lint/golint
- GO111MODULE=off go get -u github.com/kisielk/errcheck
script:
- make check_all
- git diff --exit-code
- make test-full
- make test-pure
- make victoria-metrics
- make victoria-metrics-pure
- make victoria-metrics-arm
- make victoria-metrics-arm64
after_success:
- bash <(curl -s https://codecov.io/bash)

View File

@@ -1,7 +1,7 @@
PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | sha1sum | grep -oP '^.{8}')))
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -c 10-17)))
PKG_TAG ?= $(shell git tag -l --points-at HEAD)
ifeq ($(PKG_TAG),)
@@ -50,7 +50,7 @@ errcheck: install-errcheck
install-errcheck:
which errcheck || GO111MODULE=off go get -u github.com/kisielk/errcheck
check_all: fmt vet lint errcheck golangci-lint
check-all: fmt vet lint errcheck golangci-lint
test:
GO111MODULE=on go test -tags=integration -mod=vendor ./lib/... ./app/...
@@ -75,6 +75,12 @@ vendor-update:
GO111MODULE=on go mod tidy
GO111MODULE=on go mod vendor
app-local:
CGO_ENABLED=1 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
app-local-pure:
CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
quicktemplate-gen: install-qtc
qtc

117
README.md
View File

@@ -2,7 +2,7 @@
[![Slack](https://img.shields.io/badge/join%20slack-%23victoriametrics-brightgreen.svg)](http://slack.victoriametrics.com/)
[![GitHub license](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics.svg)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
[![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://travis-ci.org/VictoriaMetrics/VictoriaMetrics.svg?branch=master)](https://travis-ci.org/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/workflows/main/badge.svg)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg)](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
<img alt="Victoria Metrics" src="logo.png">
@@ -21,7 +21,7 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
* Global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
* Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
[Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
@@ -38,13 +38,14 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
* All the data is stored in a single directory pointed by `-storageDataPath` flag.
* Easy backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
* Storage is protected from corruption on unclean shutdown (i.e. hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
* Supports metrics' ingestion and backfilling via the following protocols:
* Supports metrics' ingestion and [backfilling](#backfilling) via the following protocols:
* [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
* [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
* [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
if `-graphiteListenAddr` is set.
* [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set.
* Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars and industrial telemetry.
* [HTTP OpenTSDB /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) if `-opentsdbHTTPListenAddr` is set.
* Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars, industrial telemetry and various Enterprise workloads.
* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
@@ -86,6 +87,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
- [Tuning](#tuning)
- [Monitoring](#monitoring)
- [Troubleshooting](#troubleshooting)
- [Backfilling](#backfilling)
- [Profiling](#profiling)
- [Roadmap](#roadmap)
- [Contacts](#contacts)
- [Community and contributions](#community-and-contributions)
@@ -108,7 +111,8 @@ The following command-line flags are used the most:
* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted.
* `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces.
* `-graphiteListenAddr` - TCP and UDP address to listen to for Graphite data. By default, it is disabled.
* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data. By default, it is disabled.
* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data over telnet protocol. By default, it is disabled.
* `-opentsdbHTTPListenAddr` - TCP address to listen to for HTTP OpenTSDB data over `/api/put`. By default, it is disabled.
Pass `-help` to see all the available flags with description and default values.
@@ -124,7 +128,7 @@ remote_write:
- url: http://<victoriametrics-addr>:8428/api/v1/write
queue_config:
max_samples_per_send: 10000
max_shards: 100
max_shards: 30
```
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
@@ -182,6 +186,9 @@ Follow the following steps during the upgrade:
2) Wait until the process stops. This can take a few seconds.
3) Start the upgraded VictoriaMetrics.
Prometheus doesn't drop data during VictoriaMetrics restart.
See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
### How to apply new config to VictoriaMetrics?
@@ -191,6 +198,9 @@ VictoriaMetrics must be restarted for applying new config:
2) Wait until the process stops. This can take a few seconds.
3) Start VictoriaMetrics with the new config.
Prometheus doesn't drop data during VictoriaMetrics restart.
See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)?
@@ -205,7 +215,8 @@ For instance, put the following lines into `Telegraf` config, so it sends data t
Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
VictoriaMetrics maps Influx data using the following rules:
* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value.
* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value
unless `db` tag exists in the Influx line.
* Field names are mapped to time series names prefixed with `{measurement}{separator}` value,
where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag.
See also `-influxSkipSingleField` command-line flag.
@@ -221,8 +232,8 @@ foo,tag1=value1,tag2=value2 field1=12,field2=40
is converted into the following Prometheus data points:
```
foo.field1{tag1="value1", tag2="value2"} 12
foo.field2{tag1="value1", tag2="value2"} 40
foo_field1{tag1="value1", tag2="value2"} 12
foo_field2{tag1="value1", tag2="value2"} 40
```
Example for writing data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
@@ -236,7 +247,7 @@ An arbitrary number of lines delimited by '\n' may be sent in a single request.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
curl -G 'http://localhost:8428/api/v1/export' --data-urlencode 'match={__name__!=""}'
curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'
```
The `/api/v1/export` endpoint should return the following response:
@@ -246,6 +257,9 @@ The `/api/v1/export` endpoint should return the following response:
{"metric":{"__name__":"measurement.field2","tag1":"value1","tag2":"value2"},"values":[1.23],"timestamps":[1560272508147]}
```
Note that Influx line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp),
while VictoriaMetrics stores them with *milliseconds* precision.
### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)?
@@ -271,7 +285,7 @@ An arbitrary number of lines delimited by `\n` may be sent in one go.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
curl -G 'http://localhost:8428/api/v1/export' --data-urlencode 'match={__name__!=""}'
curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'
```
The `/api/v1/export` endpoint should return the following response:
@@ -291,8 +305,13 @@ or via [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/mas
### How to send data from OpenTSDB-compatible agents?
VictoriaMetrics supports [telnet put protocol](http://opentsdb.net/docs/build/html/api_telnet/put.html)
and [HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) for ingesting OpenTSDB data.
#### Sending data via `telnet put` protocol
1) Enable OpenTSDB receiver in VictoriaMetrics by setting `-opentsdbListenAddr` command line flag. For instance,
the following command will enable OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:
the following command enables OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:
```
/path/to/victoria-metrics-prod -opentsdbListenAddr=:4242
@@ -311,7 +330,7 @@ An arbitrary number of lines delimited by `\n` may be sent in one go.
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
curl -G 'http://localhost:8428/api/v1/export' --data-urlencode 'match={__name__!=""}'
curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'
```
The `/api/v1/export` endpoint should return the following response:
@@ -321,6 +340,44 @@ The `/api/v1/export` endpoint should return the following response:
```
#### Sending OpenTSDB data via HTTP `/api/put` requests
1) Enable HTTP server for OpenTSDB `/api/put` requests by setting `-opentsdbHTTPListenAddr` command line flag. For instance,
the following command enables OpenTSDB HTTP server on port `4242`:
```
/path/to/victoria-metrics-prod -opentsdbHTTPListenAddr=:4242
```
2) Send data to the given address from OpenTSDB-compatible agents.
Example for writing a single data point:
```
curl -H 'Content-Type: application/json' -d '{"metric":"x.y.z","value":45.34,"tags":{"t1":"v1","t2":"v2"}}' http://localhost:4242/api/put
```
Example for writing multiple data points in a single request:
```
curl -H 'Content-Type: application/json' -d '[{"metric":"foo","value":45.34},{"metric":"bar","value":43}]' http://localhost:4242/api/put
```
After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
```
curl -G 'http://localhost:8428/api/v1/export' -d 'match[]=x.y.z' -d 'match[]=foo' -d 'match[]=bar'
```
The `/api/v1/export` endpoint should return the following response:
```
{"metric":{"__name__":"foo"},"values":[45.34],"timestamps":[1566464846000]}
{"metric":{"__name__":"bar"},"values":[43],"timestamps":[1566464846000]}
{"metric":{"__name__":"x.y.z","t1":"v1","t2":"v2"},"values":[45.34],"timestamps":[1566464763000]}
```
### How to build from sources
We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
@@ -427,6 +484,9 @@ where `<timeseries_selector_for_delete>` may contain any [time series selector](
for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.
It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
before actually deleting the metrics.
### How to export time series?
@@ -524,7 +584,7 @@ kill -HUP `pidof prometheus`
If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1`
to write data to `<victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
### Multiple retentions
@@ -634,6 +694,35 @@ The most interesting metrics are:
of data loss stored in the broken parts. In the future, `vmrecover` tool will be created
for automatic recovering from such errors.
### Backfilling
Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
historical data with timestamps from the past, since the cache assumes that the data is written with
the current timestamps. Query cache can be enabled after the backfilling is complete.
### Profiling
VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
- Memory profile. It can be collected with the following command:
```
curl -s http://<victoria-metrics-host>:8428/debug/pprof/heap > mem.pprof
```
- CPU profile. It can be collected with the following command:
```
curl -s http://<victoria-metrics-host>:8428/debug/pprof/profile > cpu.pprof
```
The command for collecting CPU profile waits for 30 seconds before returning.
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
## Roadmap
- [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)

View File

@@ -1,7 +1,7 @@
# All these commands must run from repository root.
victoria-metrics:
GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics ./app/victoria-metrics
APP_NAME=victoria-metrics $(MAKE) app-local
victoria-metrics-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker
@@ -33,7 +33,7 @@ victoria-metrics-arm64-prod:
APP_NAME=victoria-metrics APP_SUFFIX='-arm64' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm64' $(MAKE) app-via-docker
victoria-metrics-pure:
GO111MODULE=on CGO_ENABLED=0 go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-pure ./app/victoria-metrics
APP_NAME=victoria-metrics $(MAKE) app-local-pure
victoria-metrics-pure-prod:
APP_NAME=victoria-metrics APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker

View File

@@ -9,6 +9,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@@ -43,6 +44,8 @@ func main() {
vmstorage.Stop()
vmselect.Stop()
fs.MustStopDirRemover()
logger.Infof("the VictoriaMetrics has been stopped in %s", time.Since(startTime))
}

View File

@@ -21,6 +21,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@@ -92,7 +93,7 @@ func setUp() {
func processFlags() {
flag.Parse()
for _, fs := range []struct {
for _, fv := range []struct {
flag string
value string
}{
@@ -103,8 +104,8 @@ func processFlags() {
{flag: "loggerLevel", value: testLogLevel},
} {
// panics if flag doesn't exist
if err := flag.Lookup(fs.flag).Value.Set(fs.value); err != nil {
log.Fatalf("unable to set %q with value %q, err: %v", fs.flag, fs.value, err)
if err := flag.Lookup(fv.flag).Value.Set(fv.value); err != nil {
log.Fatalf("unable to set %q with value %q, err: %v", fv.flag, fv.value, err)
}
}
}
@@ -121,13 +122,14 @@ func waitFor(timeout time.Duration, f func() bool) error {
}
func tearDown() {
vminsert.Stop()
vmstorage.Stop()
vmselect.Stop()
if err := httpserver.Stop(*httpListenAddr); err != nil {
log.Fatalf("cannot stop the webservice: %s", err)
}
os.RemoveAll(storagePath)
vminsert.Stop()
vmstorage.Stop()
vmselect.Stop()
fs.MustRemoveAll(storagePath)
fs.MustStopDirRemover()
}
func TestWriteRead(t *testing.T) {

View File

@@ -0,0 +1,30 @@
package common
import (
"compress/gzip"
"io"
"sync"
)
// GetGzipReader returns new gzip reader from the pool.
//
// Return back the gzip reader when it no longer needed with PutGzipReader.
func GetGzipReader(r io.Reader) (*gzip.Reader, error) {
v := gzipReaderPool.Get()
if v == nil {
return gzip.NewReader(r)
}
zr := v.(*gzip.Reader)
if err := zr.Reset(r); err != nil {
return nil, err
}
return zr, nil
}
// PutGzipReader returns back gzip reader obtained via GetGzipReader.
func PutGzipReader(zr *gzip.Reader) {
_ = zr.Close()
gzipReaderPool.Put(zr)
}
var gzipReaderPool sync.Pool

View File

@@ -2,9 +2,11 @@ package common
import (
"fmt"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
@@ -99,7 +101,10 @@ func (ctx *InsertCtx) AddLabel(name, value string) {
// FlushBufs flushes buffered rows to the underlying storage.
func (ctx *InsertCtx) FlushBufs() error {
if err := vmstorage.AddRows(ctx.mrs); err != nil {
return fmt.Errorf("cannot store metrics: %s", err)
return &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot store metrics: %s", err),
StatusCode: http.StatusServiceUnavailable,
}
}
return nil
}

View File

@@ -3,9 +3,11 @@ package concurrencylimiter
import (
"flag"
"fmt"
"net/http"
"runtime"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
"github.com/VictoriaMetrics/metrics"
)
@@ -32,6 +34,17 @@ func Init() {
func Do(f func() error) error {
// Limit the number of conurrent f calls in order to prevent from excess
// memory usage and CPU trashing.
select {
case ch <- struct{}{}:
err := f()
<-ch
return err
default:
}
// All the workers are busy.
// Sleep for up to waitDuration.
concurrencyLimitReached.Inc()
t := timerpool.Get(waitDuration)
select {
case ch <- struct{}{}:
@@ -41,9 +54,22 @@ func Do(f func() error) error {
return err
case <-t.C:
timerpool.Put(t)
concurrencyLimitErrors.Inc()
return fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase -maxConcurrentInserts or reduce the load", cap(ch))
concurrencyLimitTimeout.Inc()
return &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase -maxConcurrentInserts or reduce the load", cap(ch)),
StatusCode: http.StatusServiceUnavailable,
}
}
}
var concurrencyLimitErrors = metrics.NewCounter(`vm_concurrency_limit_errors_total`)
var (
concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_insert_limit_reached_total`)
concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_insert_limit_timeout_total`)
_ = metrics.NewGauge(`vm_concurrent_insert_capacity`, func() float64 {
return float64(cap(ch))
})
_ = metrics.NewGauge(`vm_concurrent_insert_current`, func() float64 {
return float64(len(ch))
})
)

View File

@@ -4,6 +4,8 @@ import (
"fmt"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson/fastfloat"
)
@@ -34,13 +36,8 @@ func (rs *Rows) Reset() {
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
//
// s must be unchanged until rs is in use.
func (rs *Rows) Unmarshal(s string) error {
var err error
rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
if err != nil {
return err
}
return err
func (rs *Rows) Unmarshal(s string) {
rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
}
// Row is a single graphite row.
@@ -83,6 +80,9 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
tags := tagsPool[tagsStart:]
r.Tags = tags[:len(tags):len(tags)]
}
if len(r.Metric) == 0 {
return tagsPool, fmt.Errorf("metric cannot be empty")
}
n = strings.IndexByte(tail, ' ')
if n < 0 {
@@ -95,41 +95,46 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
return tagsPool, nil
}
func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag, error) {
func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
for len(s) > 0 {
n := strings.IndexByte(s, '\n')
if n == 0 {
// Skip empty line
s = s[1:]
continue
}
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Row{})
}
r := &dst[len(dst)-1]
if n < 0 {
// The last line.
var err error
tagsPool, err = r.unmarshal(s, tagsPool)
if err != nil {
err = fmt.Errorf("cannot unmarshal Graphite line %q: %s", s, err)
return dst, tagsPool, err
}
return dst, tagsPool, nil
}
var err error
tagsPool, err = r.unmarshal(s[:n], tagsPool)
if err != nil {
err = fmt.Errorf("cannot unmarshal Graphite line %q: %s", s[:n], err)
return dst, tagsPool, err
return unmarshalRow(dst, s, tagsPool)
}
dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
s = s[n+1:]
}
return dst, tagsPool, nil
return dst, tagsPool
}
func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
if len(s) > 0 && s[len(s)-1] == '\r' {
s = s[:len(s)-1]
}
if len(s) == 0 {
// Skip empty line
return dst, tagsPool
}
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Row{})
}
r := &dst[len(dst)-1]
var err error
tagsPool, err = r.unmarshal(s, tagsPool)
if err != nil {
dst = dst[:len(dst)-1]
logger.Errorf("cannot unmarshal Graphite line %q: %s", s, err)
invalidLines.Inc()
}
return dst, tagsPool
}
var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="graphite"}`)
func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
for {
if cap(dst) > len(dst) {
@@ -145,12 +150,20 @@ func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
if err := tag.unmarshal(s); err != nil {
return dst[:len(dst)-1], err
}
if len(tag.Key) == 0 || len(tag.Value) == 0 {
// Skip empty tag
dst = dst[:len(dst)-1]
}
return dst, nil
}
if err := tag.unmarshal(s[:n]); err != nil {
return dst[:len(dst)-1], err
}
s = s[n+1:]
if len(tag.Key) == 0 || len(tag.Value) == 0 {
// Skip empty tag
dst = dst[:len(dst)-1]
}
}
}
@@ -172,9 +185,6 @@ func (t *Tag) unmarshal(s string) error {
return fmt.Errorf("missing tag value for %q", s)
}
t.Key = s[:n]
if len(t.Key) == 0 {
return fmt.Errorf("tag key cannot be empty for %q", s)
}
t.Value = s[n+1:]
return nil
}

View File

@@ -9,45 +9,42 @@ func TestRowsUnmarshalFailure(t *testing.T) {
f := func(s string) {
t.Helper()
var rows Rows
if err := rows.Unmarshal(s); err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s)
rows.Unmarshal(s)
if len(rows.Rows) != 0 {
t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
}
// Try again
if err := rows.Unmarshal(s); err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s)
rows.Unmarshal(s)
if len(rows.Rows) != 0 {
t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
}
}
// Missing metric
f(" 123 455")
// Missing value
f("aaa")
// Invalid multiline
f("aaa\nbbb 123 34")
// missing tag
f("aa; 12 34")
// missing tag value
f("aa;bb 23 34")
f("aa;=dsd 234 45")
}
func TestRowsUnmarshalSuccess(t *testing.T) {
f := func(s string, rowsExpected *Rows) {
t.Helper()
var rows Rows
if err := rows.Unmarshal(s); err != nil {
t.Fatalf("cannot unmarshal %q: %s", s, err)
}
rows.Unmarshal(s)
if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
}
// Try unmarshaling again
if err := rows.Unmarshal(s); err != nil {
t.Fatalf("cannot unmarshal %q: %s", s, err)
}
rows.Unmarshal(s)
if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
}
@@ -60,7 +57,9 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
// Empty line
f("", &Rows{})
f("\r", &Rows{})
f("\n\n", &Rows{})
f("\n\r\n", &Rows{})
// Single line
f("foobar -123.456 789", &Rows{
@@ -98,7 +97,8 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
Timestamp: 2,
}},
})
f("foo;bar=baz;aa=;x=y 1 2", &Rows{
// Empty tags
f("foo;bar=baz;aa=;x=y;=z 1 2", &Rows{
Rows: []Row{{
Metric: "foo",
Tags: []Tag{
@@ -106,10 +106,6 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
Key: "bar",
Value: "baz",
},
{
Key: "aa",
Value: "",
},
{
Key: "x",
Value: "y",
@@ -139,4 +135,20 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
},
},
})
// Multi lines with invalid line
f("foo 0.3 2\naaa\nbar.baz 0.34 43\n", &Rows{
Rows: []Row{
{
Metric: "foo",
Value: 0.3,
Timestamp: 2,
},
{
Metric: "bar.baz",
Value: 0.34,
Timestamp: 43,
},
},
})
}

View File

@@ -16,8 +16,9 @@ cpu.usage_irq 0.34432 1234556768
b.RunParallel(func(pb *testing.PB) {
var rows Rows
for pb.Next() {
if err := rows.Unmarshal(s); err != nil {
panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
rows.Unmarshal(s)
if len(rows.Rows) != 4 {
panic(fmt.Errorf("unexpected number of rows unmarshaled: got %d; want 4", len(rows.Rows)))
}
}
})

View File

@@ -85,11 +85,7 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
return false
}
}
if err := ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf)); err != nil {
graphiteUnmarshalErrors.Inc()
ctx.err = fmt.Errorf("cannot unmarshal graphite plaintext protocol data with size %d: %s", len(ctx.reqBuf), err)
return false
}
ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
// Fill missing timestamps with the current timestamp rounded to seconds.
currentTimestamp := time.Now().Unix()
@@ -136,9 +132,8 @@ func (ctx *pushCtx) reset() {
}
var (
graphiteReadCalls = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
graphiteReadErrors = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
graphiteUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="graphite"}`)
graphiteReadCalls = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
graphiteReadErrors = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
)
func getPushCtx() *pushCtx {

View File

@@ -4,6 +4,8 @@ import (
"fmt"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson/fastfloat"
)
@@ -41,13 +43,8 @@ func (rs *Rows) Reset() {
// See https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/
//
// s must be unchanged until rs is in use.
func (rs *Rows) Unmarshal(s string) error {
var err error
rs.Rows, rs.tagsPool, rs.fieldsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], rs.fieldsPool[:0])
if err != nil {
return err
}
return err
func (rs *Rows) Unmarshal(s string) {
rs.Rows, rs.tagsPool, rs.fieldsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], rs.fieldsPool[:0])
}
// Row is a single influx row.
@@ -65,9 +62,8 @@ func (r *Row) reset() {
r.Timestamp = 0
}
func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field) ([]Tag, []Field, error) {
func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field, noEscapeChars bool) ([]Tag, []Field, error) {
r.reset()
noEscapeChars := strings.IndexByte(s, '\\') < 0
n := nextUnescapedChar(s, ' ', noEscapeChars)
if n < 0 {
return tagsPool, fieldsPool, fmt.Errorf("cannot find Whitespace I in %q", s)
@@ -141,9 +137,6 @@ func (tag *Tag) unmarshal(s string, noEscapeChars bool) error {
return fmt.Errorf("missing tag value for %q", s)
}
tag.Key = unescapeTagValue(s[:n], noEscapeChars)
if len(tag.Key) == 0 {
return fmt.Errorf("tag key cannot be empty")
}
tag.Value = unescapeTagValue(s[n+1:], noEscapeChars)
return nil
}
@@ -177,41 +170,51 @@ func (f *Field) unmarshal(s string, noEscapeChars, hasQuotedFields bool) error {
return nil
}
func unmarshalRows(dst []Row, s string, tagsPool []Tag, fieldsPool []Field) ([]Row, []Tag, []Field, error) {
func unmarshalRows(dst []Row, s string, tagsPool []Tag, fieldsPool []Field) ([]Row, []Tag, []Field) {
noEscapeChars := strings.IndexByte(s, '\\') < 0
for len(s) > 0 {
n := strings.IndexByte(s, '\n')
if n == 0 {
// Skip empty line
s = s[1:]
continue
}
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Row{})
}
r := &dst[len(dst)-1]
if n < 0 {
// The last line.
var err error
tagsPool, fieldsPool, err = r.unmarshal(s, tagsPool, fieldsPool)
if err != nil {
err = fmt.Errorf("cannot unmarshal Influx line %q: %s", s, err)
return dst, tagsPool, fieldsPool, err
}
return dst, tagsPool, fieldsPool, nil
}
var err error
tagsPool, fieldsPool, err = r.unmarshal(s[:n], tagsPool, fieldsPool)
if err != nil {
err = fmt.Errorf("cannot unmarshal Influx line %q: %s", s[:n], err)
return dst, tagsPool, fieldsPool, err
return unmarshalRow(dst, s, tagsPool, fieldsPool, noEscapeChars)
}
dst, tagsPool, fieldsPool = unmarshalRow(dst, s[:n], tagsPool, fieldsPool, noEscapeChars)
s = s[n+1:]
}
return dst, tagsPool, fieldsPool, nil
return dst, tagsPool, fieldsPool
}
func unmarshalRow(dst []Row, s string, tagsPool []Tag, fieldsPool []Field, noEscapeChars bool) ([]Row, []Tag, []Field) {
if len(s) > 0 && s[len(s)-1] == '\r' {
s = s[:len(s)-1]
}
if len(s) == 0 {
// Skip empty line
return dst, tagsPool, fieldsPool
}
if s[0] == '#' {
// Skip comment
return dst, tagsPool, fieldsPool
}
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Row{})
}
r := &dst[len(dst)-1]
var err error
tagsPool, fieldsPool, err = r.unmarshal(s, tagsPool, fieldsPool, noEscapeChars)
if err != nil {
dst = dst[:len(dst)-1]
logger.Errorf("cannot unmarshal Influx line %q: %s; skipping it", s, err)
invalidLines.Inc()
}
return dst, tagsPool, fieldsPool
}
var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="influx"}`)
func unmarshalTags(dst []Tag, s string, noEscapeChars bool) ([]Tag, error) {
for {
if cap(dst) > len(dst) {
@@ -223,14 +226,22 @@ func unmarshalTags(dst []Tag, s string, noEscapeChars bool) ([]Tag, error) {
n := nextUnescapedChar(s, ',', noEscapeChars)
if n < 0 {
if err := tag.unmarshal(s, noEscapeChars); err != nil {
return dst, err
return dst[:len(dst)-1], err
}
if len(tag.Key) == 0 || len(tag.Value) == 0 {
// Skip empty tag
dst = dst[:len(dst)-1]
}
return dst, nil
}
if err := tag.unmarshal(s[:n], noEscapeChars); err != nil {
return dst, err
return dst[:len(dst)-1], err
}
s = s[n+1:]
if len(tag.Key) == 0 || len(tag.Value) == 0 {
// Skip empty tag
dst = dst[:len(dst)-1]
}
}
}

View File

@@ -74,13 +74,15 @@ func TestRowsUnmarshalFailure(t *testing.T) {
f := func(s string) {
t.Helper()
var rows Rows
if err := rows.Unmarshal(s); err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s)
rows.Unmarshal(s)
if len(rows.Rows) != 0 {
t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
}
// Try again
if err := rows.Unmarshal(s); err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s)
rows.Unmarshal(s)
if len(rows.Rows) != 0 {
t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
}
}
@@ -94,12 +96,8 @@ func TestRowsUnmarshalFailure(t *testing.T) {
// Missing tag value
f("foo,bar")
f("foo,bar baz")
f("foo,bar= baz")
f("foo,bar=123, 123")
// Missing tag name
f("foo,=bar baz=234")
// Missing field value
f("foo bar")
f("foo bar=")
@@ -122,17 +120,13 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
f := func(s string, rowsExpected *Rows) {
t.Helper()
var rows Rows
if err := rows.Unmarshal(s); err != nil {
t.Fatalf("cannot unmarshal %q: %s", s, err)
}
rows.Unmarshal(s)
if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
}
// Try unmarshaling again
if err := rows.Unmarshal(s); err != nil {
t.Fatalf("cannot unmarshal %q: %s", s, err)
}
rows.Unmarshal(s)
if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
}
@@ -146,6 +140,12 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
// Empty line
f("", &Rows{})
f("\n\n", &Rows{})
f("\n\r\n", &Rows{})
// Comment
f("\n# foobar\n", &Rows{})
f("#foobar baz", &Rows{})
f("#foobar baz\n#sss", &Rows{})
// Minimal line without tags and timestamp
f("foo bar=123", &Rows{
@@ -157,6 +157,15 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
}},
}},
})
f("# comment\nfoo bar=123\r\n#comment2 sdsf dsf", &Rows{
Rows: []Row{{
Measurement: "foo",
Fields: []Field{{
Key: "bar",
Value: 123,
}},
}},
})
f("foo bar=123\n", &Rows{
Rows: []Row{{
Measurement: "foo",
@@ -216,7 +225,7 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
})
// Line with empty tag values
f("foo,tag1=xyz,tagN=,tag2=43as bar=123", &Rows{
f("foo,tag1=xyz,tagN=,tag2=43as,=xxx bar=123", &Rows{
Rows: []Row{{
Measurement: "foo",
Tags: []Tag{
@@ -224,10 +233,6 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
Key: "tag1",
Value: "xyz",
},
{
Key: "tagN",
Value: "",
},
{
Key: "tag2",
Value: "43as",
@@ -309,11 +314,11 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
})
// Escape chars
f(`fo\,bar\=baz,x\==\\a\,\=\q\ \\\a\=\,=4.34`, &Rows{
f(`fo\,bar\=baz,x\=\b=\\a\,\=\q\ \\\a\=\,=4.34`, &Rows{
Rows: []Row{{
Measurement: `fo,bar=baz`,
Tags: []Tag{{
Key: `x=`,
Key: `x=\b`,
Value: `\a,=\q `,
}},
Fields: []Field{{
@@ -348,6 +353,34 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
},
},
})
// Multiple lines with invalid line in the middle.
f("foo,tag=xyz field=1.23 48934\n"+
"invalid line\n"+
"bar x=-1i\n\n", &Rows{
Rows: []Row{
{
Measurement: "foo",
Tags: []Tag{{
Key: "tag",
Value: "xyz",
}},
Fields: []Field{{
Key: "field",
Value: 1.23,
}},
Timestamp: 48934,
},
{
Measurement: "bar",
Fields: []Field{{
Key: "x",
Value: -1,
}},
},
},
})
// No newline after the second line.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/82
f("foo,tag=xyz field=1.23 48934\n"+
@@ -374,4 +407,24 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
},
},
})
f("x,y=z,g=p:\\ \\ 5432\\,\\ gp\\ mon\\ [lol]\\ con10\\ cmd5\\ SELECT f=1", &Rows{
Rows: []Row{{
Measurement: "x",
Tags: []Tag{
{
Key: "y",
Value: "z",
},
{
Key: "g",
Value: "p: 5432, gp mon [lol] con10 cmd5 SELECT",
},
},
Fields: []Field{{
Key: "f",
Value: 1,
}},
}},
})
}

View File

@@ -6,14 +6,19 @@ import (
)
func BenchmarkRowsUnmarshal(b *testing.B) {
s := `cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 1234556768`
s := `cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 1234556768
cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
aaa usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
bbb usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
`
b.SetBytes(int64(len(s)))
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
var rows Rows
for pb.Next() {
if err := rows.Unmarshal(s); err != nil {
panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
rows.Unmarshal(s)
if len(rows.Rows) != 4 {
panic(fmt.Errorf("unexpected number of rows parsed; got %d; want 4", len(rows.Rows)))
}
}
})

View File

@@ -1,7 +1,6 @@
package influx
import (
"compress/gzip"
"flag"
"fmt"
"io"
@@ -41,11 +40,11 @@ func insertHandlerInternal(req *http.Request) error {
r := req.Body
if req.Header.Get("Content-Encoding") == "gzip" {
zr, err := getGzipReader(r)
zr, err := common.GetGzipReader(r)
if err != nil {
return fmt.Errorf("cannot read gzipped influx line protocol data: %s", err)
}
defer putGzipReader(zr)
defer common.PutGzipReader(zr)
r = zr
}
@@ -91,11 +90,17 @@ func (ctx *pushCtx) InsertRows(db string) error {
for i := range rows {
r := &rows[i]
ic.Labels = ic.Labels[:0]
ic.AddLabel("db", db)
hasDBLabel := false
for j := range r.Tags {
tag := &r.Tags[j]
if tag.Key == "db" {
hasDBLabel = true
}
ic.AddLabel(tag.Key, tag.Value)
}
if len(db) > 0 && !hasDBLabel {
ic.AddLabel("db", db)
}
ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
skipFieldKey := len(r.Fields) == 1 && *skipSingleField
@@ -120,25 +125,6 @@ func (ctx *pushCtx) InsertRows(db string) error {
return ic.FlushBufs()
}
func getGzipReader(r io.Reader) (*gzip.Reader, error) {
v := gzipReaderPool.Get()
if v == nil {
return gzip.NewReader(r)
}
zr := v.(*gzip.Reader)
if err := zr.Reset(r); err != nil {
return nil, err
}
return zr, nil
}
func putGzipReader(zr *gzip.Reader) {
_ = zr.Close()
gzipReaderPool.Put(zr)
}
var gzipReaderPool sync.Pool
func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
if ctx.err != nil {
return false
@@ -151,11 +137,7 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
}
return false
}
if err := ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf)); err != nil {
influxUnmarshalErrors.Inc()
ctx.err = fmt.Errorf("cannot unmarshal influx line protocol data with size %d: %s", len(ctx.reqBuf), err)
return false
}
ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
// Adjust timestamps according to tsMultiplier
currentTs := time.Now().UnixNano() / 1e6
@@ -184,9 +166,8 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
}
var (
influxReadCalls = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
influxReadErrors = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
influxUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="influx"}`)
influxReadCalls = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
influxReadErrors = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
)
type pushCtx struct {

View File

@@ -10,19 +10,25 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdbhttp"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
)
var (
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
maxInsertRequestSize = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
maxInsertRequestSize = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 30, "The maximum number of labels accepted per time series. Superflouos labels are dropped")
)
// Init initializes vminsert.
func Init() {
storage.SetMaxLabelsPerTimeseries(*maxLabelsPerTimeseries)
concurrencylimiter.Init()
if len(*graphiteListenAddr) > 0 {
go graphite.Serve(*graphiteListenAddr)
@@ -30,6 +36,9 @@ func Init() {
if len(*opentsdbListenAddr) > 0 {
go opentsdb.Serve(*opentsdbListenAddr)
}
if len(*opentsdbHTTPListenAddr) > 0 {
go opentsdbhttp.Serve(*opentsdbHTTPListenAddr, int64(*maxInsertRequestSize))
}
}
// Stop stops vminsert.
@@ -40,6 +49,9 @@ func Stop() {
if len(*opentsdbListenAddr) > 0 {
opentsdb.Stop()
}
if len(*opentsdbHTTPListenAddr) > 0 {
opentsdbhttp.Stop()
}
}
// RequestHandler is a handler for Prometheus remote storage write API

View File

@@ -4,6 +4,8 @@ import (
"fmt"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson/fastfloat"
)
@@ -34,13 +36,8 @@ func (rs *Rows) Reset() {
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
//
// s must be unchanged until rs is in use.
func (rs *Rows) Unmarshal(s string) error {
var err error
rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
if err != nil {
return err
}
return err
func (rs *Rows) Unmarshal(s string) {
rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
}
// Row is a single OpenTSDB row.
@@ -69,6 +66,9 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
return tagsPool, fmt.Errorf("cannot find whitespace between metric and timestamp in %q", s)
}
r.Metric = s[:n]
if len(r.Metric) == 0 {
return tagsPool, fmt.Errorf("metric cannot be empty")
}
tail := s[n+1:]
n = strings.IndexByte(tail, ' ')
if n < 0 {
@@ -92,41 +92,46 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
return tagsPool, nil
}
func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag, error) {
func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
for len(s) > 0 {
n := strings.IndexByte(s, '\n')
if n == 0 {
// Skip empty line
s = s[1:]
continue
}
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Row{})
}
r := &dst[len(dst)-1]
if n < 0 {
// The last line.
var err error
tagsPool, err = r.unmarshal(s, tagsPool)
if err != nil {
err = fmt.Errorf("cannot unmarshal OpenTSDB line %q: %s", s, err)
return dst, tagsPool, err
}
return dst, tagsPool, nil
}
var err error
tagsPool, err = r.unmarshal(s[:n], tagsPool)
if err != nil {
err = fmt.Errorf("cannot unmarshal OpenTSDB line %q: %s", s[:n], err)
return dst, tagsPool, err
return unmarshalRow(dst, s, tagsPool)
}
dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
s = s[n+1:]
}
return dst, tagsPool, nil
return dst, tagsPool
}
func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
if len(s) > 0 && s[len(s)-1] == '\r' {
s = s[:len(s)-1]
}
if len(s) == 0 {
// Skip empty line
return dst, tagsPool
}
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Row{})
}
r := &dst[len(dst)-1]
var err error
tagsPool, err = r.unmarshal(s, tagsPool)
if err != nil {
dst = dst[:len(dst)-1]
logger.Errorf("cannot unmarshal OpenTSDB line %q: %s", s, err)
invalidLines.Inc()
}
return dst, tagsPool
}
var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="opentsdb"}`)
func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
for {
if cap(dst) > len(dst) {
@@ -142,12 +147,20 @@ func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
if err := tag.unmarshal(s); err != nil {
return dst[:len(dst)-1], err
}
if len(tag.Key) == 0 || len(tag.Value) == 0 {
// Skip empty tag
dst = dst[:len(dst)-1]
}
return dst, nil
}
if err := tag.unmarshal(s[:n]); err != nil {
return dst[:len(dst)-1], err
}
s = s[n+1:]
if len(tag.Key) == 0 || len(tag.Value) == 0 {
// Skip empty tag
dst = dst[:len(dst)-1]
}
}
}
@@ -169,9 +182,6 @@ func (t *Tag) unmarshal(s string) error {
return fmt.Errorf("missing tag value for %q", s)
}
t.Key = s[:n]
if len(t.Key) == 0 {
return fmt.Errorf("tag key cannot be empty for %q", s)
}
t.Value = s[n+1:]
return nil
}

View File

@@ -9,19 +9,24 @@ func TestRowsUnmarshalFailure(t *testing.T) {
f := func(s string) {
t.Helper()
var rows Rows
if err := rows.Unmarshal(s); err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s)
rows.Unmarshal(s)
if len(rows.Rows) != 0 {
t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
}
// Try again
if err := rows.Unmarshal(s); err == nil {
t.Fatalf("expecting non-nil error when parsing %q", s)
rows.Unmarshal(s)
if len(rows.Rows) != 0 {
t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
}
}
// Missing put prefix
f("xx")
// Missing metric
f("put 111 34")
// Missing timestamp
f("put aaa")
@@ -42,26 +47,19 @@ func TestRowsUnmarshalFailure(t *testing.T) {
// Invalid tag
f("put aaa 123 4.5 foo")
f("put aaa 123 4.5 =")
f("put aaa 123 4.5 =foo")
f("put aaa 123 4.5 =foo a=b")
}
func TestRowsUnmarshalSuccess(t *testing.T) {
f := func(s string, rowsExpected *Rows) {
t.Helper()
var rows Rows
if err := rows.Unmarshal(s); err != nil {
t.Fatalf("cannot unmarshal %q: %s", s, err)
}
rows.Unmarshal(s)
if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
}
// Try unmarshaling again
if err := rows.Unmarshal(s); err != nil {
t.Fatalf("cannot unmarshal %q: %s", s, err)
}
rows.Unmarshal(s)
if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
}
@@ -74,7 +72,9 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
// Empty line
f("", &Rows{})
f("\r", &Rows{})
f("\n\n", &Rows{})
f("\n\r\n", &Rows{})
// Single line
f("put foobar 789 -123.456 a=b", &Rows{
@@ -88,17 +88,13 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
}},
}},
})
// Empty tag value
f("put foobar 789 -123.456 a= b=c", &Rows{
// Empty tag
f("put foobar 789 -123.456 a= b=c =d", &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -123.456,
Timestamp: 789,
Tags: []Tag{
{
Key: "a",
Value: "",
},
{
Key: "b",
Value: "c",
@@ -200,4 +196,27 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
},
},
})
// Multi lines with invalid line
f("put foo 2 0.3 a=b\naaa bbb\nput bar.baz 43 0.34 a=b\n", &Rows{
Rows: []Row{
{
Metric: "foo",
Value: 0.3,
Timestamp: 2,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
},
{
Metric: "bar.baz",
Value: 0.34,
Timestamp: 43,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
},
},
})
}

View File

@@ -6,18 +6,19 @@ import (
)
func BenchmarkRowsUnmarshal(b *testing.B) {
s := `cpu.usage_user 1234556768 1.23 a=b
cpu.usage_system 1234556768 23.344 a=b
cpu.usage_iowait 1234556769 3.3443 a=b
cpu.usage_irq 1234556768 0.34432 a=b
s := `put cpu.usage_user 1234556768 1.23 a=b
put cpu.usage_system 1234556768 23.344 a=b
put cpu.usage_iowait 1234556769 3.3443 a=b
put cpu.usage_irq 1234556768 0.34432 a=b
`
b.SetBytes(int64(len(s)))
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
var rows Rows
for pb.Next() {
if err := rows.Unmarshal(s); err != nil {
panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
rows.Unmarshal(s)
if len(rows.Rows) != 4 {
panic(fmt.Errorf("unexpected number of parsed rows; got %d; want 4", len(rows.Rows)))
}
}
})

View File

@@ -85,15 +85,21 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
return false
}
}
if err := ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf)); err != nil {
opentsdbUnmarshalErrors.Inc()
ctx.err = fmt.Errorf("cannot unmarshal OpenTSDB put protocol data with size %d: %s", len(ctx.reqBuf), err)
return false
ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
// Fill in missing timestamps
currentTimestamp := time.Now().Unix()
rows := ctx.Rows.Rows
for i := range rows {
r := &rows[i]
if r.Timestamp == 0 {
r.Timestamp = currentTimestamp
}
}
// Convert timestamps from seconds to milliseconds
for i := range ctx.Rows.Rows {
ctx.Rows.Rows[i].Timestamp *= 1e3
for i := range rows {
rows[i].Timestamp *= 1e3
}
return true
}
@@ -125,9 +131,8 @@ func (ctx *pushCtx) reset() {
}
var (
opentsdbReadCalls = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
opentsdbReadErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
opentsdbUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb"}`)
opentsdbReadCalls = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
opentsdbReadErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
)
func getPushCtx() *pushCtx {

View File

@@ -0,0 +1,198 @@
package opentsdbhttp
import (
"fmt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson"
"github.com/valyala/fastjson/fastfloat"
)
// Rows contains parsed OpenTSDB rows.
type Rows struct {
Rows []Row
tagsPool []Tag
}
// Reset resets rs.
func (rs *Rows) Reset() {
// Release references to objects, so they can be GC'ed.
for i := range rs.Rows {
rs.Rows[i].reset()
}
rs.Rows = rs.Rows[:0]
for i := range rs.tagsPool {
rs.tagsPool[i].reset()
}
rs.tagsPool = rs.tagsPool[:0]
}
// Unmarshal unmarshals OpenTSDB rows from av.
//
// See http://opentsdb.net/docs/build/html/api_http/put.html
//
// s must be unchanged until rs is in use.
func (rs *Rows) Unmarshal(av *fastjson.Value) {
rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], av, rs.tagsPool[:0])
}
// Row is a single OpenTSDB row.
type Row struct {
Metric string
Tags []Tag
Value float64
Timestamp int64
}
func (r *Row) reset() {
r.Metric = ""
r.Tags = nil
r.Value = 0
r.Timestamp = 0
}
func (r *Row) unmarshal(o *fastjson.Value, tagsPool []Tag) ([]Tag, error) {
r.reset()
m := o.GetStringBytes("metric")
if len(m) == 0 {
return tagsPool, fmt.Errorf("missing `metric` in %s", o)
}
r.Metric = bytesutil.ToUnsafeString(m)
rawTs := o.Get("timestamp")
if rawTs != nil {
ts, err := getFloat64(rawTs)
if err != nil {
return tagsPool, fmt.Errorf("invalid `timestamp` in %s: %s", o, err)
}
r.Timestamp = int64(ts)
} else {
// Allow missing timestamp. It is automatically populated
// with the current time in this case.
r.Timestamp = 0
}
rawV := o.Get("value")
if rawV == nil {
return tagsPool, fmt.Errorf("missing `value` in %s", o)
}
v, err := getFloat64(rawV)
if err != nil {
return tagsPool, fmt.Errorf("invalid `value` in %s: %s", o, err)
}
r.Value = v
vt := o.Get("tags")
if vt == nil {
// Allow empty tags.
return tagsPool, nil
}
rawTags, err := vt.Object()
if err != nil {
return tagsPool, fmt.Errorf("invalid `tags` in %s: %s", o, err)
}
tagsStart := len(tagsPool)
tagsPool, err = unmarshalTags(tagsPool, rawTags)
if err != nil {
return tagsPool, fmt.Errorf("cannot parse tags %s: %s", rawTags, err)
}
tags := tagsPool[tagsStart:]
r.Tags = tags[:len(tags):len(tags)]
return tagsPool, nil
}
func getFloat64(v *fastjson.Value) (float64, error) {
switch v.Type() {
case fastjson.TypeNumber:
return v.Float64()
case fastjson.TypeString:
vStr, _ := v.StringBytes()
vFloat := fastfloat.ParseBestEffort(bytesutil.ToUnsafeString(vStr))
if vFloat == 0 && string(vStr) != "0" && string(vStr) != "0.0" {
return 0, fmt.Errorf("invalid float64 value: %q", vStr)
}
return vFloat, nil
default:
return 0, fmt.Errorf("value doesn't contain float64; it contains %s", v.Type())
}
}
func unmarshalRows(dst []Row, av *fastjson.Value, tagsPool []Tag) ([]Row, []Tag) {
switch av.Type() {
case fastjson.TypeObject:
return unmarshalRow(dst, av, tagsPool)
case fastjson.TypeArray:
a, _ := av.Array()
for _, o := range a {
dst, tagsPool = unmarshalRow(dst, o, tagsPool)
}
return dst, tagsPool
default:
logger.Errorf("OpenTSDB JSON must be either object or array; got %s; body=%s", av.Type(), av)
invalidLines.Inc()
return dst, tagsPool
}
}
func unmarshalRow(dst []Row, o *fastjson.Value, tagsPool []Tag) ([]Row, []Tag) {
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Row{})
}
r := &dst[len(dst)-1]
var err error
tagsPool, err = r.unmarshal(o, tagsPool)
if err != nil {
dst = dst[:len(dst)-1]
logger.Errorf("cannot unmarshal OpenTSDB object %s: %s", o, err)
invalidLines.Inc()
}
return dst, tagsPool
}
var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="opentsdb-http"}`)
func unmarshalTags(dst []Tag, o *fastjson.Object) ([]Tag, error) {
var err error
o.Visit(func(k []byte, v *fastjson.Value) {
if v.Type() != fastjson.TypeString {
err = fmt.Errorf("tag value must be string; got %s; value=%s", v.Type(), v)
return
}
if len(k) == 0 {
// Skip empty tags
return
}
vStr, _ := v.StringBytes()
if len(vStr) == 0 {
// Skip empty tags
return
}
if cap(dst) > len(dst) {
dst = dst[:len(dst)+1]
} else {
dst = append(dst, Tag{})
}
tag := &dst[len(dst)-1]
tag.Key = bytesutil.ToUnsafeString(k)
tag.Value = bytesutil.ToUnsafeString(vStr)
})
return dst, err
}
// Tag is an OpenTSDB tag.
type Tag struct {
Key string
Value string
}
func (t *Tag) reset() {
t.Key = ""
t.Value = ""
}

View File

@@ -0,0 +1,246 @@
package opentsdbhttp
import (
"reflect"
"testing"
)
func TestRowsUnmarshalFailure(t *testing.T) {
f := func(s string) {
t.Helper()
var rows Rows
p := parserPool.Get()
defer parserPool.Put(p)
v, err := p.Parse(s)
if err != nil {
// Expected JSON parser error
return
}
// Verify OpenTSDB body parsing error
rows.Unmarshal(v)
if len(rows.Rows) != 0 {
t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
}
// Try again
rows.Unmarshal(v)
if len(rows.Rows) != 0 {
t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
}
}
// invalid json
f("{g")
// Invalid json type
f(`1`)
f(`"foo"`)
f(`[1,2]`)
f(`null`)
// Incomplete object
f(`{}`)
f(`{"metric": "aaa"}`)
f(`{"metric": "aaa", "timestamp": 1122}`)
f(`{"metric": "aaa", "timestamp": "tststs"}`)
f(`{"timestamp": 1122, "value": 33}`)
f(`{"value": 33}`)
f(`{"value": 33, "tags": {"fooo":"bar"}}`)
// Invalid value
f(`{"metric": "aaa", "timestamp": 1122, "value": "0.0.0"}`)
// Invalid metric type
f(`{"metric": "", "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
f(`{"metric": ["aaa"], "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
f(`{"metric": {"aaa":1}, "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
f(`{"metric": 1, "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
// Invalid timestamp type
f(`{"metric": "aaa", "timestamp": "foobar", "value": 0.45, "tags": {"foo": "bar"}}`)
f(`{"metric": "aaa", "timestamp": [1,2], "value": 0.45, "tags": {"foo": "bar"}}`)
f(`{"metric": "aaa", "timestamp": {"a":1}, "value": 0.45, "tags": {"foo": "bar"}}`)
// Invalid value type
f(`{"metric": "aaa", "timestamp": 1122, "value": [0,1], "tags": {"foo":"bar"}}`)
f(`{"metric": "aaa", "timestamp": 1122, "value": {"a":1}, "tags": {"foo":"bar"}}`)
f(`{"metric": "aaa", "timestamp": 1122, "value": "foobar", "tags": {"foo":"bar"}}`)
// Invalid tags type
f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": 1}`)
f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": [1,2]}`)
f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": "foo"}`)
// Invalid tag value type
f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": ["bar"]}}`)
f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": {"bar":"baz"}}}`)
f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": 1}}`)
// Invalid multiline
f(`[{"metric": "aaa", "timestamp": 1122, "value": "trt", "tags":{"foo":"bar"}}, {"metric": "aaa", "timestamp": [1122], "value": 111}]`)
}
func TestRowsUnmarshalSuccess(t *testing.T) {
f := func(s string, rowsExpected *Rows) {
t.Helper()
var rows Rows
p := parserPool.Get()
defer parserPool.Put(p)
v, err := p.Parse(s)
if err != nil {
t.Fatalf("cannot parse json %s: %s", s, err)
}
rows.Unmarshal(v)
if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
}
// Try unmarshaling again
rows.Unmarshal(v)
if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
}
rows.Reset()
if len(rows.Rows) != 0 {
t.Fatalf("non-empty rows after reset: %+v", rows.Rows)
}
}
// Normal line
f(`{"metric": "foobar", "timestamp": 789, "value": -123.456, "tags": {"a":"b"}}`, &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -123.456,
Timestamp: 789,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
}},
})
// Timestamp as string
f(`{"metric": "foobar", "timestamp": "1789", "value": -123.456, "tags": {"a":"b"}}`, &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -123.456,
Timestamp: 1789,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
}},
})
// Timestamp as float64 (it is truncated to integer)
f(`{"metric": "foobar", "timestamp": 17.89, "value": -123.456, "tags": {"a":"b"}}`, &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -123.456,
Timestamp: 17,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
}},
})
// Empty tags
f(`{"metric": "foobar", "timestamp": 789, "value": -123.456, "tags": {}}`, &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -123.456,
Timestamp: 789,
Tags: nil,
}},
})
// Missing tags
f(`{"metric": "foobar", "timestamp": 789, "value": -123.456}`, &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -123.456,
Timestamp: 789,
Tags: nil,
}},
})
// Empty tag value
f(`{"metric": "foobar", "timestamp": 123, "value": -123.456, "tags": {"a":"", "b":"c", "": "d"}}`, &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -123.456,
Timestamp: 123,
Tags: []Tag{
{
Key: "b",
Value: "c",
},
},
}},
})
// Value as string
f(`{"metric": "foobar", "timestamp": 789, "value": "-12.456", "tags": {"a":"b"}}`, &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -12.456,
Timestamp: 789,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
}},
})
// Missing timestamp
f(`{"metric": "foobar", "value": "-12.456", "tags": {"a":"b"}}`, &Rows{
Rows: []Row{{
Metric: "foobar",
Value: -12.456,
Timestamp: 0,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
}},
})
// Multiple tags
f(`{"metric": "foo", "value": 1, "timestamp": 2, "tags": {"bar":"baz", "x": "y"}}`, &Rows{
Rows: []Row{{
Metric: "foo",
Tags: []Tag{
{
Key: "bar",
Value: "baz",
},
{
Key: "x",
Value: "y",
},
},
Value: 1,
Timestamp: 2,
}},
})
// Multi lines
f(`[{"metric": "foo", "value": "0.3", "timestamp": 2, "tags": {"a":"b"}},
{"metric": "bar.baz", "value": 0.34, "timestamp": 43, "tags": {"a":"b"}}]`, &Rows{
Rows: []Row{
{
Metric: "foo",
Value: 0.3,
Timestamp: 2,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
},
{
Metric: "bar.baz",
Value: 0.34,
Timestamp: 43,
Tags: []Tag{{
Key: "a",
Value: "b",
}},
},
},
})
}

View File

@@ -0,0 +1,33 @@
package opentsdbhttp
import (
"fmt"
"testing"
"github.com/valyala/fastjson"
)
func BenchmarkRowsUnmarshal(b *testing.B) {
s := `[{"metric": "cpu.usage_user", "timestamp": 1234556768, "value": 1.23, "tags": {"a":"b", "x": "y"}},
{"metric": "cpu.usage_system", "timestamp": 1234556768, "value": 23.344, "tags": {"a":"b"}},
{"metric": "cpu.usage_iowait", "timestamp": 1234556769, "value":3.3443, "tags": {"a":"b"}},
{"metric": "cpu.usage_irq", "timestamp": 1234556768, "value": 0.34432, "tags": {"a":"b"}}
]
`
b.SetBytes(int64(len(s)))
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
var rows Rows
var p fastjson.Parser
for pb.Next() {
v, err := p.Parse(s)
if err != nil {
panic(fmt.Errorf("cannot parse %q: %s", s, err))
}
rows.Unmarshal(v)
if len(rows.Rows) != 4 {
panic(fmt.Errorf("unexpected number of rows unmarshaled; got %d; want 4", len(rows.Rows)))
}
}
})
}

View File

@@ -0,0 +1,150 @@
package opentsdbhttp
import (
"fmt"
"io"
"net/http"
"runtime"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/fastjson"
)
var (
rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdb-http"}`)
rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb-http"}`)
opentsdbReadCalls = metrics.NewCounter(`vm_read_calls_total{name="opentsdb-http"}`)
opentsdbReadErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb-http"}`)
opentsdbUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb-http"}`)
)
// insertHandler processes HTTP OpenTSDB put requests.
// See http://opentsdb.net/docs/build/html/api_http/put.html
func insertHandler(req *http.Request, maxSize int64) error {
return concurrencylimiter.Do(func() error {
return insertHandlerInternal(req, maxSize)
})
}
func insertHandlerInternal(req *http.Request, maxSize int64) error {
opentsdbReadCalls.Inc()
r := req.Body
if req.Header.Get("Content-Encoding") == "gzip" {
zr, err := common.GetGzipReader(r)
if err != nil {
opentsdbReadErrors.Inc()
return fmt.Errorf("cannot read gzipped http protocol data: %s", err)
}
defer common.PutGzipReader(zr)
r = zr
}
ctx := getPushCtx()
defer putPushCtx(ctx)
// Read the request in ctx.reqBuf
lr := io.LimitReader(r, maxSize+1)
reqLen, err := ctx.reqBuf.ReadFrom(lr)
if err != nil {
opentsdbReadErrors.Inc()
return fmt.Errorf("cannot read HTTP OpenTSDB request: %s", err)
}
if reqLen > maxSize {
opentsdbReadErrors.Inc()
return fmt.Errorf("too big HTTP OpenTSDB request; mustn't exceed %d bytes", maxSize)
}
// Unmarshal the request to ctx.Rows
p := parserPool.Get()
defer parserPool.Put(p)
v, err := p.ParseBytes(ctx.reqBuf.B)
if err != nil {
opentsdbUnmarshalErrors.Inc()
return fmt.Errorf("cannot parse HTTP OpenTSDB json: %s", err)
}
ctx.Rows.Unmarshal(v)
// Fill in missing timestamps
currentTimestamp := time.Now().Unix()
rows := ctx.Rows.Rows
for i := range rows {
r := &rows[i]
if r.Timestamp == 0 {
r.Timestamp = currentTimestamp
}
}
// Convert timestamps in seconds to milliseconds if needed.
// See http://opentsdb.net/docs/javadoc/net/opentsdb/core/Const.html#SECOND_MASK
for i := range rows {
r := &rows[i]
if r.Timestamp&secondMask == 0 {
r.Timestamp *= 1e3
}
}
// Insert ctx.Rows to db.
ic := &ctx.Common
ic.Reset(len(rows))
for i := range rows {
r := &rows[i]
ic.Labels = ic.Labels[:0]
ic.AddLabel("", r.Metric)
for j := range r.Tags {
tag := &r.Tags[j]
ic.AddLabel(tag.Key, tag.Value)
}
ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
}
rowsInserted.Add(len(rows))
rowsPerInsert.Update(float64(len(rows)))
return ic.FlushBufs()
}
const secondMask int64 = 0x7FFFFFFF00000000
var parserPool fastjson.ParserPool
type pushCtx struct {
Rows Rows
Common common.InsertCtx
reqBuf bytesutil.ByteBuffer
}
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
ctx.Common.Reset(0)
ctx.reqBuf.Reset()
}
func getPushCtx() *pushCtx {
select {
case ctx := <-pushCtxPoolCh:
return ctx
default:
if v := pushCtxPool.Get(); v != nil {
return v.(*pushCtx)
}
return &pushCtx{}
}
}
func putPushCtx(ctx *pushCtx) {
ctx.reset()
select {
case pushCtxPoolCh <- ctx:
default:
pushCtxPool.Put(ctx)
}
}
var pushCtxPool sync.Pool
var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))

View File

@@ -0,0 +1,70 @@
package opentsdbhttp
import (
"context"
"net/http"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
)
var (
writeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/put", protocol="opentsdb-http"}`)
writeErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/put", protocol="opentsdb-http"}`)
)
var (
httpServer *http.Server
httpAddr string
maxRequestSize int64
)
// Serve starts HTTP OpenTSDB server on the given addr.
func Serve(addr string, maxReqSize int64) {
logger.Infof("starting HTTP OpenTSDB server at %q", addr)
httpAddr = addr
maxRequestSize = maxReqSize
httpServer = &http.Server{
Addr: addr,
Handler: http.HandlerFunc(requestHandler),
ReadTimeout: 30 * time.Second,
WriteTimeout: 10 * time.Second,
}
go func() {
err := httpServer.ListenAndServe()
if err == http.ErrServerClosed {
return
}
if err != nil {
logger.Fatalf("FATAL: error serving HTTP OpenTSDB: %s", err)
}
}()
}
// requestHandler handles HTTP OpenTSDB insert request.
func requestHandler(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/api/put":
writeRequests.Inc()
if err := insertHandler(r, maxRequestSize); err != nil {
writeErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return
}
w.WriteHeader(http.StatusNoContent)
default:
httpserver.Errorf(w, "unexpected path requested on HTTP OpenTSDB server: %q", r.URL.Path)
}
}
// Stop stops HTTP OpenTSDB server.
func Stop() {
logger.Infof("stopping HTTP OpenTSDB server at %q...", httpAddr)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := httpServer.Shutdown(ctx); err != nil {
logger.Fatalf("FATAL: cannot close HTTP OpenTSDB server: %s", err)
}
}

View File

@@ -2,6 +2,7 @@ package vmselect
import (
"flag"
"fmt"
"net/http"
"runtime"
"strings"
@@ -30,29 +31,53 @@ func Init() {
fs.RemoveDirContents(tmpDirPath)
netstorage.InitTmpBlocksDir(tmpDirPath)
promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")
concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
}
var concurrencyCh chan struct{}
// Stop stops vmselect
func Stop() {
promql.StopRollupResultCache()
}
var concurrencyCh chan struct{}
var (
concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_select_limit_reached_total`)
concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_select_limit_timeout_total`)
_ = metrics.NewGauge(`vm_concurrent_select_capacity`, func() float64 {
return float64(cap(concurrencyCh))
})
_ = metrics.NewGauge(`vm_concurrent_select_current`, func() float64 {
return float64(len(concurrencyCh))
})
)
// RequestHandler handles remote read API requests for Prometheus
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
// Limit the number of concurrent queries.
// Sleep for a while until giving up. This should resolve short bursts in requests.
t := timerpool.Get(*maxQueueDuration)
select {
case concurrencyCh <- struct{}{}:
timerpool.Put(t)
defer func() { <-concurrencyCh }()
case <-t.C:
timerpool.Put(t)
httpserver.Errorf(w, "cannot handle more than %d concurrent requests", cap(concurrencyCh))
return true
default:
// Sleep for a while until giving up. This should resolve short bursts in requests.
concurrencyLimitReached.Inc()
t := timerpool.Get(*maxQueueDuration)
select {
case concurrencyCh <- struct{}{}:
timerpool.Put(t)
defer func() { <-concurrencyCh }()
case <-t.C:
timerpool.Put(t)
concurrencyLimitTimeout.Inc()
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot handle more than %d concurrent requests", cap(concurrencyCh)),
StatusCode: http.StatusServiceUnavailable,
}
httpserver.Errorf(w, "%s", err)
return true
}
}
path := strings.Replace(r.URL.Path, "//", "/", -1)
@@ -165,7 +190,10 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
logger.Errorf("error in %q: %s", r.URL.Path, err)
w.Header().Set("Content-Type", "application/json")
statusCode := 422
statusCode := http.StatusUnprocessableEntity
if esc, ok := err.(*httpserver.ErrorWithStatusCode); ok {
statusCode = esc.StatusCode
}
w.WriteHeader(statusCode)
prometheus.WriteErrorResponse(w, statusCode, err)
}

View File

@@ -0,0 +1,15 @@
package netstorage
import (
"os"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"golang.org/x/sys/unix"
)
func mustFadviseRandomRead(f *os.File) {
fd := int(f.Fd())
if err := unix.Fadvise(int(fd), 0, 0, unix.FADV_RANDOM|unix.FADV_WILLNEED); err != nil {
logger.Panicf("FATAL: error returned from unix.Fadvise(RANDOM|WILLNEED): %s", err)
}
}

View File

@@ -19,9 +19,9 @@ import (
)
var (
maxTagKeysPerSearch = flag.Int("search.maxTagKeys", 10e3, "The maximum number of tag keys returned per search")
maxTagValuesPerSearch = flag.Int("search.maxTagValues", 10e3, "The maximum number of tag values returned per search")
maxMetricsPerSearch = flag.Int("search.maxUniqueTimeseries", 100e3, "The maximum number of unique time series each search can scan")
maxTagKeysPerSearch = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search")
maxTagValuesPerSearch = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search")
maxMetricsPerSearch = flag.Int("search.maxUniqueTimeseries", 300e3, "The maximum number of unique time series each search can scan")
)
// Result is a single timeseries result.

View File

@@ -1,7 +1,6 @@
package netstorage
import (
"bufio"
"fmt"
"io/ioutil"
"os"
@@ -10,6 +9,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
)
@@ -30,13 +30,23 @@ func InitTmpBlocksDir(tmpDirPath string) {
var tmpBlocksDir string
const maxInmemoryTmpBlocksFile = 512 * 1024
func maxInmemoryTmpBlocksFile() int {
mem := memory.Allowed()
maxLen := mem / 1024
if maxLen < 64*1024 {
return 64 * 1024
}
return maxLen
}
var _ = metrics.NewGauge(`vm_tmp_blocks_max_inmemory_file_size_bytes`, func() float64 {
return float64(maxInmemoryTmpBlocksFile())
})
type tmpBlocksFile struct {
buf []byte
f *os.File
bw *bufio.Writer
f *os.File
offset uint64
}
@@ -44,7 +54,9 @@ type tmpBlocksFile struct {
func getTmpBlocksFile() *tmpBlocksFile {
v := tmpBlocksFilePool.Get()
if v == nil {
return &tmpBlocksFile{}
return &tmpBlocksFile{
buf: make([]byte, 0, maxInmemoryTmpBlocksFile()),
}
}
return v.(*tmpBlocksFile)
}
@@ -53,7 +65,6 @@ func putTmpBlocksFile(tbf *tmpBlocksFile) {
tbf.MustClose()
tbf.buf = tbf.buf[:0]
tbf.f = nil
tbf.bw = nil
tbf.offset = 0
tmpBlocksFilePool.Put(tbf)
}
@@ -69,22 +80,6 @@ func (addr tmpBlockAddr) String() string {
return fmt.Sprintf("offset %d, size %d", addr.offset, addr.size)
}
func getBufioWriter(f *os.File) *bufio.Writer {
v := bufioWriterPool.Get()
if v == nil {
return bufio.NewWriterSize(f, maxInmemoryTmpBlocksFile*2)
}
bw := v.(*bufio.Writer)
bw.Reset(f)
return bw
}
func putBufioWriter(bw *bufio.Writer) {
bufioWriterPool.Put(bw)
}
var bufioWriterPool sync.Pool
var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_total`)
// WriteBlock writes b to tbf.
@@ -92,28 +87,31 @@ var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_tota
// It returns errors since the operation may fail on space shortage
// and this must be handled.
func (tbf *tmpBlocksFile) WriteBlock(b *storage.Block) (tmpBlockAddr, error) {
bb := tmpBufPool.Get()
defer tmpBufPool.Put(bb)
bb.B = storage.MarshalBlock(bb.B[:0], b)
var addr tmpBlockAddr
addr.offset = tbf.offset
tbfBufLen := len(tbf.buf)
tbf.buf = storage.MarshalBlock(tbf.buf, b)
addr.size = len(tbf.buf) - tbfBufLen
addr.size = len(bb.B)
tbf.offset += uint64(addr.size)
if tbf.offset <= maxInmemoryTmpBlocksFile {
if len(tbf.buf)+len(bb.B) <= cap(tbf.buf) {
// Fast path - the data fits tbf.buf
tbf.buf = append(tbf.buf, bb.B...)
return addr, nil
}
// Slow path: flush the data from tbf.buf to file.
if tbf.f == nil {
f, err := ioutil.TempFile(tmpBlocksDir, "")
if err != nil {
return addr, err
}
tbf.f = f
tbf.bw = getBufioWriter(f)
tmpBlocksFilesCreated.Inc()
}
_, err := tbf.bw.Write(tbf.buf)
tbf.buf = tbf.buf[:0]
_, err := tbf.f.Write(tbf.buf)
tbf.buf = append(tbf.buf[:0], bb.B...)
if err != nil {
return addr, fmt.Errorf("cannot write block to %q: %s", tbf.f.Name(), err)
}
@@ -124,15 +122,15 @@ func (tbf *tmpBlocksFile) Finalize() error {
if tbf.f == nil {
return nil
}
err := tbf.bw.Flush()
putBufioWriter(tbf.bw)
tbf.bw = nil
if _, err := tbf.f.Write(tbf.buf); err != nil {
return fmt.Errorf("cannot flush the remaining %d bytes to tmpBlocksFile: %s", len(tbf.buf), err)
}
tbf.buf = tbf.buf[:0]
if _, err := tbf.f.Seek(0, 0); err != nil {
logger.Panicf("FATAL: cannot seek to the start of file: %s", err)
}
mustFadviseRandomRead(tbf.f)
return err
return nil
}
func (tbf *tmpBlocksFile) MustReadBlockAt(dst *storage.Block, addr tmpBlockAddr) {
@@ -167,10 +165,6 @@ func (tbf *tmpBlocksFile) MustClose() {
if tbf.f == nil {
return
}
if tbf.bw != nil {
putBufioWriter(tbf.bw)
tbf.bw = nil
}
fname := tbf.f.Name()
// Remove the file at first, then close it.

View File

@@ -30,7 +30,7 @@ func TestTmpBlocksFileSerial(t *testing.T) {
}
func TestTmpBlocksFileConcurrent(t *testing.T) {
concurrency := 4
concurrency := 3
ch := make(chan error, concurrency)
for i := 0; i < concurrency; i++ {
go func() {
@@ -69,7 +69,7 @@ func testTmpBlocksFile() error {
_, _, _ = b.MarshalData(0, 0)
return &b
}
for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile / 2, 2 * maxInmemoryTmpBlocksFile} {
for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile() / 2, 2 * maxInmemoryTmpBlocksFile()} {
err := func() error {
tbf := getTmpBlocksFile()
defer putTmpBlocksFile(tbf)
@@ -94,7 +94,7 @@ func testTmpBlocksFile() error {
}
// Read blocks in parallel and verify them
concurrency := 3
concurrency := 2
workCh := make(chan int)
doneCh := make(chan error)
for i := 0; i < concurrency; i++ {

View File

@@ -557,7 +557,9 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
if err := promql.ValidateMaxPointsPerTimeseries(start, end, step); err != nil {
return err
}
start, end = promql.AdjustStartEnd(start, end, step)
if mayCache {
start, end = promql.AdjustStartEnd(start, end, step)
}
ec := promql.EvalConfig{
Start: start,
@@ -574,12 +576,47 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
result = adjustLastPoints(result)
}
// Remove NaN values as Prometheus does.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/153
removeNaNValuesInplace(result)
w.Header().Set("Content-Type", "application/json")
WriteQueryRangeResponse(w, result)
queryRangeDuration.UpdateDuration(startTime)
return nil
}
func removeNaNValuesInplace(tss []netstorage.Result) {
for i := range tss {
ts := &tss[i]
hasNaNs := false
for _, v := range ts.Values {
if math.IsNaN(v) {
hasNaNs = true
break
}
}
if !hasNaNs {
// Fast path: nothing to remove.
continue
}
// Slow path: remove NaNs.
srcTimestamps := ts.Timestamps
dstValues := ts.Values[:0]
dstTimestamps := ts.Timestamps[:0]
for j, v := range ts.Values {
if math.IsNaN(v) {
continue
}
dstValues = append(dstValues, v)
dstTimestamps = append(dstTimestamps, srcTimestamps[j])
}
ts.Values = dstValues
ts.Timestamps = dstTimestamps
}
}
var queryRangeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/query_range"}`)
// adjustLastPoints substitutes the last point values with the previous

View File

@@ -2,11 +2,48 @@ package prometheus
import (
"fmt"
"math"
"net/http"
"net/url"
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
)
func TestRemoveNaNValuesInplace(t *testing.T) {
f := func(tss []netstorage.Result, tssExpected []netstorage.Result) {
t.Helper()
removeNaNValuesInplace(tss)
if !reflect.DeepEqual(tss, tssExpected) {
t.Fatalf("unexpected result; got %v; want %v", tss, tssExpected)
}
}
nan := math.NaN()
f(nil, nil)
f([]netstorage.Result{
{
Timestamps: []int64{100, 200, 300},
Values: []float64{1, 2, 3},
},
{
Timestamps: []int64{100, 200, 300, 400},
Values: []float64{nan, nan, 3, nan},
},
}, []netstorage.Result{
{
Timestamps: []int64{100, 200, 300},
Values: []float64{1, 2, 3},
},
{
Timestamps: []int64{300},
Values: []float64{3},
},
})
}
func TestGetTimeSuccess(t *testing.T) {
f := func(s string, timestampExpected int64) {
t.Helper()

View File

@@ -353,6 +353,25 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
if err != nil {
return nil, err
}
// Remove dstLabel from grouping like Prometheus does.
modifier := &afa.ae.Modifier
switch strings.ToLower(modifier.Op) {
case "without":
modifier.Args = append(modifier.Args, dstLabel)
case "by":
dstArgs := modifier.Args[:0]
for _, arg := range modifier.Args {
if arg == dstLabel {
continue
}
dstArgs = append(dstArgs, arg)
}
modifier.Args = dstArgs
default:
// Do nothing
}
afe := func(tss []*timeseries) []*timeseries {
m := make(map[float64]bool)
for _, ts := range tss {

View File

@@ -179,7 +179,8 @@ func compareValues(vs1, vs2 []float64) error {
}
continue
}
if v1 != v2 {
eps := math.Abs(v1 - v2)
if eps > 1e-14 {
return fmt.Errorf("unexpected value; got %v; want %v", v1, v2)
}
}

View File

@@ -322,6 +322,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
}
src := tssRight[0]
for _, ts := range tssLeft {
resetMetricGroupIfRequired(be, ts)
ts.MetricName.AddMissingTags(joinTags, &src.MetricName)
rvsLeft = append(rvsLeft, ts)
rvsRight = append(rvsRight, src)
@@ -332,6 +333,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
}
src := tssLeft[0]
for _, ts := range tssRight {
resetMetricGroupIfRequired(be, ts)
ts.MetricName.AddMissingTags(joinTags, &src.MetricName)
rvsLeft = append(rvsLeft, src)
rvsRight = append(rvsRight, ts)
@@ -416,10 +418,25 @@ func binaryOpIfnot(left, right float64) float64 {
}
func binaryOpEq(left, right float64) bool {
// Special handling for nan == nan.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
if math.IsNaN(left) {
return math.IsNaN(right)
}
return left == right
}
func binaryOpNeq(left, right float64) bool {
// Special handling for comparison with nan.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
if math.IsNaN(left) {
return !math.IsNaN(right)
}
if math.IsNaN(right) {
return true
}
return left != right
}

View File

@@ -105,14 +105,14 @@ func maySortResults(e expr, tss []*timeseries) bool {
func timeseriesToResult(tss []*timeseries, maySort bool) ([]netstorage.Result, error) {
tss = removeNaNs(tss)
result := make([]netstorage.Result, len(tss))
m := make(map[string]bool)
m := make(map[string]struct{}, len(tss))
bb := bbPool.Get()
for i, ts := range tss {
bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
if m[string(bb.B)] {
if _, ok := m[string(bb.B)]; ok {
return nil, fmt.Errorf(`duplicate output timeseries: %s%s`, ts.MetricName.MetricGroup, stringMetricName(&ts.MetricName))
}
m[string(bb.B)] = true
m[string(bb.B)] = struct{}{}
rs := &result[i]
rs.MetricNameMarshaled = append(rs.MetricNameMarshaled[:0], bb.B...)

View File

@@ -1302,6 +1302,44 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`label_value()`, func(t *testing.T) {
t.Parallel()
q := `with (
x = (
label_set(time(), "foo", "123.456", "__name__", "aaa"),
label_set(-time(), "foo", "bar", "__name__", "bbb"),
label_set(-time(), "__name__", "bxs"),
label_set(-time(), "foo", "45", "bar", "xs"),
)
)
sort(x + label_value(x, "foo"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{-955, -1155, -1355, -1555, -1755, -1955},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("bar"),
Value: []byte("xs"),
},
{
Key: []byte("foo"),
Value: []byte("45"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1123.456, 1323.456, 1523.456, 1723.456, 1923.456, 2123.456},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("123.456"),
}}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`label_transform(mismatch)`, func(t *testing.T) {
t.Parallel()
q := `label_transform(time(), "__name__", "foobar", "xx")`
@@ -1821,9 +1859,9 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`vector * on(foo) group_left() duplicate_timeseries`, func(t *testing.T) {
t.Run(`vector * on(foo) group_left() duplicate_nonoverlapping_timeseries`, func(t *testing.T) {
t.Parallel()
q := `label_set(time()/10, "foo", "bar") + on(foo) group_left() (
q := `label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_left() (
label_set(time() < 1400, "foo", "bar", "op", "le"),
label_set(time() >= 1400, "foo", "bar", "op", "ge"),
)`
@@ -1832,13 +1870,85 @@ func TestExecSuccess(t *testing.T) {
Values: []float64{1100, 1320, 1540, 1760, 1980, 2200},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("xx"),
Value: []byte("yy"),
},
}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`vector * on(foo) group_left(__name__)`, func(t *testing.T) {
t.Parallel()
q := `label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_left(__name__)
label_set(time(), "foo", "bar", "__name__", "aaa")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1100, 1320, 1540, 1760, 1980, 2200},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("aaa")
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("xx"),
Value: []byte("yy"),
},
}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`vector * on(foo) group_right()`, func(t *testing.T) {
t.Parallel()
q := `sort(label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_right(xx) (
label_set(time(), "foo", "bar", "__name__", "aaa"),
label_set(time()+3, "foo", "bar", "__name__", "yyy","ppp", "123"),
))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1100, 1320, 1540, 1760, 1980, 2200},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("xx"),
Value: []byte("yy"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1103, 1323, 1543, 1763, 1983, 2203},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("ppp"),
Value: []byte("123"),
},
{
Key: []byte("xx"),
Value: []byte("yy"),
},
}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`vector * on() group_left scalar`, func(t *testing.T) {
t.Parallel()
q := `sort_desc((label_set(time(), "foo", "bar") or label_set(10, "foo", "qwert")) * on() group_left 2)`
@@ -2160,21 +2270,78 @@ func TestExecSuccess(t *testing.T) {
})
t.Run(`histogram_quantile(negative-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_quantile(0.6,
q := `histogram_quantile(0.6,
label_set(90, "foo", "bar", "le", "10")
or label_set(-100, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf")
))`
)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{30, 30, 30, 30, 30, 30},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.6,
label_set(90, "foo", "bar", "le", "10")
or label_set(NaN, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf")
)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{30, 30, 30, 30, 30, 30},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.2,
label_set(0, "foo", "bar", "le", "10")
or label_set(100, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf")
)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{22, 22, 22, 22, 22, 22},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_quantile(zero-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.6,
label_set(0, "foo", "bar", "le", "10")
or label_set(0, "foo", "bar", "le", "30")
or label_set(0, "foo", "bar", "le", "+Inf")
)`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
t.Parallel()
q := `sort(histogram_quantile(0.6,
label_set(90, "foo", "bar", "le", "10")
or label_set(NaN, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf")
))`
q := `histogram_quantile(0.6,
label_set(nan, "foo", "bar", "le", "10")
or label_set(nan, "foo", "bar", "le", "30")
or label_set(nan, "foo", "bar", "le", "+Inf")
)`
resultExpected := []netstorage.Result{}
f(q, resultExpected)
})
@@ -3723,6 +3890,107 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3, r4, r5, r6}
f(q, resultExpected)
})
t.Run(`count_values by (xxx)`, func(t *testing.T) {
t.Parallel()
q := `count_values("xxx", label_set(10, "foo", "bar", "xxx", "aaa") or label_set(floor(time()/600), "foo", "bar", "baz", "xx")) by (xxx)`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, nan, nan, nan, nan, nan},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("xxx"),
Value: []byte("1"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, 1, 1, 1, nan, nan},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("xxx"),
Value: []byte("2"),
},
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, nan, 1, 1},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{
{
Key: []byte("xxx"),
Value: []byte("3"),
},
}
r4 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r4.MetricName.Tags = []storage.Tag{
{
Key: []byte("xxx"),
Value: []byte("10"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3, r4}
f(q, resultExpected)
})
t.Run(`count_values without (baz)`, func(t *testing.T) {
t.Parallel()
q := `count_values("xxx", label_set(floor(time()/600), "foo", "bar")) without (baz)`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, nan, nan, nan, nan, nan},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("xxx"),
Value: []byte("1"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, 1, 1, 1, nan, nan},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("xxx"),
Value: []byte("2"),
},
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, nan, 1, 1},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
{
Key: []byte("xxx"),
Value: []byte("3"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
}
func TestExecError(t *testing.T) {

View File

@@ -149,12 +149,6 @@ func scanString(s string) (string, error) {
}
func scanPositiveNumber(s string) (string, error) {
if strings.HasPrefix(s, "Inf") {
return "Inf", nil
}
if strings.HasPrefix(s, "NaN") {
return "NaN", nil
}
// Scan integer part. It may be empty if fractional part exists.
i := 0
for i < len(s) && isDecimalChar(s[i]) {
@@ -333,6 +327,14 @@ func scanTagFilterOpPrefix(s string) int {
return -1
}
func isInfOrNaN(s string) bool {
if len(s) != 3 {
return false
}
s = strings.ToLower(s)
return s == "inf" || s == "nan"
}
func isOffset(s string) bool {
s = strings.ToLower(s)
return s == "offset"
@@ -361,7 +363,7 @@ func isPositiveNumberPrefix(s string) bool {
// Check for .234 numbers
if s[0] != '.' || len(s) < 2 {
return strings.HasPrefix(s, "Inf") || strings.HasPrefix(s, "NaN")
return false
}
return isDecimalChar(s[1])
}

View File

@@ -373,7 +373,7 @@ func (p *parser) parseSingleExpr() (expr, error) {
}
func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
if isPositiveNumberPrefix(p.lex.Token) {
if isPositiveNumberPrefix(p.lex.Token) || isInfOrNaN(p.lex.Token) {
return p.parsePositiveNumberExpr()
}
if isStringPrefix(p.lex.Token) {
@@ -417,7 +417,7 @@ func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
}
func (p *parser) parsePositiveNumberExpr() (*numberExpr, error) {
if !isPositiveNumberPrefix(p.lex.Token) {
if !isPositiveNumberPrefix(p.lex.Token) && !isInfOrNaN(p.lex.Token) {
return nil, fmt.Errorf(`positiveNumberExpr: unexpected token %q; want "number"`, p.lex.Token)
}

View File

@@ -170,14 +170,34 @@ func TestParsePromQLSuccess(t *testing.T) {
another(`-.2`, `-0.2`)
another(`-.2E-2`, `-0.002`)
same(`NaN`)
another(`nan`, `NaN`)
another(`NAN`, `NaN`)
another(`nAN`, `NaN`)
another(`Inf`, `+Inf`)
another(`INF`, `+Inf`)
another(`inf`, `+Inf`)
another(`+Inf`, `+Inf`)
another(`-Inf`, `-Inf`)
another(`-inF`, `-Inf`)
// binaryOpExpr
another(`NaN + 2 *3 * Inf`, `NaN`)
another(`Inf - Inf`, `NaN`)
another(`Inf + Inf`, `+Inf`)
another(`nan == nan`, `NaN`)
another(`nan ==bool nan`, `1`)
another(`nan !=bool nan`, `0`)
another(`nan !=bool 2`, `1`)
another(`2 !=bool nan`, `1`)
another(`nan >bool nan`, `0`)
another(`nan <bool nan`, `0`)
another(`1 ==bool nan`, `0`)
another(`NaN !=bool 1`, `1`)
another(`inf >=bool 2`, `1`)
another(`-1 >bool -inf`, `1`)
another(`-1 <bool -inf`, `0`)
another(`nan + 2 *3 * inf`, `NaN`)
another(`INF - Inf`, `NaN`)
another(`Inf + inf`, `+Inf`)
another(`1/0`, `+Inf`)
another(`0/0`, `NaN`)
another(`-m`, `0 - m`)
same(`m + ignoring () n[5m]`)
another(`M + IGNORING () N[5m]`, `M + ignoring () N[5m]`)

View File

@@ -45,6 +45,8 @@ var rollupFuncs = map[string]newRollupFunc{
"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
"integrate": newRollupFuncOneArg(rollupIntegrate),
"ideriv": newRollupFuncOneArg(rollupIderiv),
"lifetime": newRollupFuncOneArg(rollupLifetime),
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
"rollup": newRollupFuncOneArg(rollupFake),
"rollup_rate": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_deriv": newRollupFuncOneArg(rollupFake),
@@ -61,6 +63,8 @@ var rollupFuncsMayAdjustWindow = map[string]bool{
"deriv_fast": true,
"irate": true,
"rate": true,
"lifetime": true,
"scrape_interval": true,
}
var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -193,23 +197,21 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
i := 0
j := 0
ni := 0
nj := 0
for _, tEnd := range rc.Timestamps {
tStart := tEnd - window
n := sort.Search(len(timestamps)-i, func(n int) bool {
return timestamps[i+n] > tStart
})
i += n
ni = seekFirstTimestampIdxAfter(timestamps[i:], tStart, ni)
i += ni
if j < i {
j = i
}
n = sort.Search(len(timestamps)-j, func(n int) bool {
return timestamps[j+n] > tEnd
})
j += n
nj = seekFirstTimestampIdxAfter(timestamps[j:], tEnd, nj)
j += nj
rfa.prevValue = nan
rfa.prevTimestamp = tStart - maxPrevInterval
if i > 0 && timestamps[i-1] > rfa.prevTimestamp {
if i < len(timestamps) && i > 0 && timestamps[i-1] > rfa.prevTimestamp {
rfa.prevValue = values[i-1]
rfa.prevTimestamp = timestamps[i-1]
}
@@ -225,6 +227,46 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
return dstValues
}
func seekFirstTimestampIdxAfter(timestamps []int64, seekTimestamp int64, nHint int) int {
if len(timestamps) == 0 || timestamps[0] > seekTimestamp {
return 0
}
startIdx := nHint - 2
if startIdx < 0 {
startIdx = 0
}
if startIdx >= len(timestamps) {
startIdx = len(timestamps) - 1
}
endIdx := nHint + 2
if endIdx > len(timestamps) {
endIdx = len(timestamps)
}
if startIdx > 0 && timestamps[startIdx] <= seekTimestamp {
timestamps = timestamps[startIdx:]
endIdx -= startIdx
} else {
startIdx = 0
}
if endIdx < len(timestamps) && timestamps[endIdx] > seekTimestamp {
timestamps = timestamps[:endIdx]
}
if len(timestamps) < 16 {
// Fast path: the number of timestamps to search is small, so scan them all.
for i, timestamp := range timestamps {
if timestamp > seekTimestamp {
return startIdx + i
}
}
return startIdx + len(timestamps)
}
// Slow path: too big len(timestamps), so use binary search.
i := sort.Search(len(timestamps), func(n int) bool {
return n >= 0 && n < len(timestamps) && timestamps[n] > seekTimestamp
})
return startIdx + i
}
func getMaxPrevInterval(timestamps []int64) int64 {
if len(timestamps) < 2 {
return int64(maxSilenceInterval)
@@ -615,10 +657,15 @@ func rollupDelta(rfa *rollupFuncArg) float64 {
if len(values) == 0 {
return nan
}
if len(values) == 1 {
// Assume that the previous non-existing value was 0.
return values[0]
}
prevValue = values[0]
values = values[1:]
}
if len(values) == 0 {
// Assume that the value didn't change on the given interval.
return 0
}
return values[len(values)-1] - prevValue
@@ -632,6 +679,7 @@ func rollupIdelta(rfa *rollupFuncArg) float64 {
if math.IsNaN(rfa.prevValue) {
return nan
}
// Assume that the value didn't change on the given interval.
return 0
}
lastValue := values[len(values)-1]
@@ -639,7 +687,8 @@ func rollupIdelta(rfa *rollupFuncArg) float64 {
if len(values) == 0 {
prevValue := rfa.prevValue
if math.IsNaN(prevValue) {
return 0
// Assume that the previous non-existing value was 0.
return lastValue
}
return lastValue - prevValue
}
@@ -661,7 +710,8 @@ func rollupDerivFast(rfa *rollupFuncArg) float64 {
prevValue := rfa.prevValue
prevTimestamp := rfa.prevTimestamp
if math.IsNaN(prevValue) {
if len(values) == 0 {
if len(values) < 2 {
// It is impossible to calculate derivative on 0 or 1 values.
return nan
}
prevValue = values[0]
@@ -670,6 +720,7 @@ func rollupDerivFast(rfa *rollupFuncArg) float64 {
timestamps = timestamps[1:]
}
if len(values) == 0 {
// Assume that the value didn't change on the given interval.
return 0
}
vEnd := values[len(values)-1]
@@ -684,11 +735,12 @@ func rollupIderiv(rfa *rollupFuncArg) float64 {
// before calling rollup funcs.
values := rfa.values
timestamps := rfa.timestamps
if len(values) == 0 {
if math.IsNaN(rfa.prevValue) {
if len(values) < 2 {
if len(values) == 0 || math.IsNaN(rfa.prevValue) {
// It is impossible to calculate derivative on 0 or 1 values.
return nan
}
return 0
return (values[0] - rfa.prevValue) / (float64(timestamps[0]-rfa.prevTimestamp) * 1e-3)
}
vEnd := values[len(values)-1]
tEnd := timestamps[len(timestamps)-1]
@@ -712,7 +764,37 @@ func rollupIderiv(rfa *rollupFuncArg) float64 {
}
dv := vEnd - vStart
dt := tEnd - tStart
return dv / (float64(dt) / 1000)
return dv / (float64(dt) * 1e-3)
}
func rollupLifetime(rfa *rollupFuncArg) float64 {
// Calculate the duration between the first and the last data points.
timestamps := rfa.timestamps
if math.IsNaN(rfa.prevValue) {
if len(timestamps) < 2 {
return nan
}
return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3
}
if len(timestamps) == 0 {
return nan
}
return float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3
}
func rollupScrapeInterval(rfa *rollupFuncArg) float64 {
// Calculate the average interval between data points.
timestamps := rfa.timestamps
if math.IsNaN(rfa.prevValue) {
if len(timestamps) < 2 {
return nan
}
return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3 / float64(len(timestamps)-1)
}
if len(timestamps) == 0 {
return nan
}
return (float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3) / float64(len(timestamps))
}
func rollupChanges(rfa *rollupFuncArg) float64 {

View File

@@ -4,14 +4,15 @@ import (
"crypto/rand"
"flag"
"fmt"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
"github.com/VictoriaMetrics/fastcache"
"github.com/VictoriaMetrics/metrics"
)
@@ -19,7 +20,7 @@ import (
var disableCache = flag.Bool("search.disableCache", false, "Whether to disable response caching. This may be useful during data backfilling")
var rollupResultCacheV = &rollupResultCache{
fastcache.New(1024 * 1024), // This is a cache for testing.
c: workingsetcache.New(1024*1024, time.Hour), // This is a cache for testing.
}
var rollupResultCachePath string
@@ -43,12 +44,13 @@ var (
func InitRollupResultCache(cachePath string) {
rollupResultCachePath = cachePath
startTime := time.Now()
var c *fastcache.Cache
cacheSize := getRollupResultCacheSize()
var c *workingsetcache.Cache
if len(rollupResultCachePath) > 0 {
logger.Infof("loading rollupResult cache from %q...", rollupResultCachePath)
c = fastcache.LoadFromFileOrNew(rollupResultCachePath, getRollupResultCacheSize())
c = workingsetcache.Load(rollupResultCachePath, cacheSize, time.Hour)
} else {
c = fastcache.New(getRollupResultCacheSize())
c = workingsetcache.New(cacheSize, time.Hour)
}
if *disableCache {
c.Reset()
@@ -96,25 +98,26 @@ func InitRollupResultCache(cachePath string) {
// StopRollupResultCache closes the rollupResult cache.
func StopRollupResultCache() {
if len(rollupResultCachePath) == 0 {
rollupResultCacheV.c.Reset()
rollupResultCacheV.c.Stop()
rollupResultCacheV.c = nil
return
}
gomaxprocs := runtime.GOMAXPROCS(-1)
logger.Infof("saving rollupResult cache to %q...", rollupResultCachePath)
startTime := time.Now()
if err := rollupResultCacheV.c.SaveToFileConcurrent(rollupResultCachePath, gomaxprocs); err != nil {
if err := rollupResultCacheV.c.Save(rollupResultCachePath); err != nil {
logger.Errorf("cannot close rollupResult cache at %q: %s", rollupResultCachePath, err)
} else {
var fcs fastcache.Stats
rollupResultCacheV.c.UpdateStats(&fcs)
rollupResultCacheV.c.Reset()
logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, sizeBytes: %d",
rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
return
}
var fcs fastcache.Stats
rollupResultCacheV.c.UpdateStats(&fcs)
rollupResultCacheV.c.Stop()
rollupResultCacheV.c = nil
logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, sizeBytes: %d",
rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
}
type rollupResultCache struct {
c *fastcache.Cache
c *workingsetcache.Cache
}
var rollupResultCacheResets = metrics.NewCounter(`vm_cache_resets_total{type="promql/rollupResult"}`)
@@ -148,15 +151,23 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
return nil, ec.Start
}
bb.B = key.Marshal(bb.B[:0])
resultBuf := rrc.c.GetBig(nil, bb.B)
if len(resultBuf) == 0 {
compressedResultBuf := resultBufPool.Get()
defer resultBufPool.Put(compressedResultBuf)
compressedResultBuf.B = rrc.c.GetBig(compressedResultBuf.B[:0], bb.B)
if len(compressedResultBuf.B) == 0 {
mi.RemoveKey(key)
metainfoBuf = mi.Marshal(metainfoBuf[:0])
bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
rrc.c.Set(bb.B, metainfoBuf)
return nil, ec.Start
}
tss, err := unmarshalTimeseriesFast(resultBuf)
// Decompress into newly allocated byte slice, since tss returned from unmarshalTimeseriesFast
// refers to the byte slice, so it cannot be returned to the resultBufPool.
resultBuf, err := encoding.DecompressZSTD(nil, compressedResultBuf.B)
if err != nil {
logger.Panicf("BUG: cannot decompress resultBuf from rollupResultCache: %s; it looks like it was improperly saved", err)
}
tss, err = unmarshalTimeseriesFast(resultBuf)
if err != nil {
logger.Panicf("BUG: cannot unmarshal timeseries from rollupResultCache: %s; it looks like it was improperly saved", err)
}
@@ -196,6 +207,8 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
return tss, newStart
}
var resultBufPool bytesutil.ByteBufferPool
func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExpr, iafc *incrementalAggrFuncContext, window int64, tss []*timeseries) {
if *disableCache || len(tss) == 0 || !ec.mayCache() {
return
@@ -227,11 +240,16 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp
// Store tss in the cache.
maxMarshaledSize := getRollupResultCacheSize() / 4
tssMarshaled := marshalTimeseriesFast(tss, maxMarshaledSize, ec.Step)
if tssMarshaled == nil {
resultBuf := resultBufPool.Get()
defer resultBufPool.Put(resultBuf)
resultBuf.B = marshalTimeseriesFast(resultBuf.B[:0], tss, maxMarshaledSize, ec.Step)
if len(resultBuf.B) == 0 {
tooBigRollupResults.Inc()
return
}
compressedResultBuf := resultBufPool.Get()
defer resultBufPool.Put(compressedResultBuf)
compressedResultBuf.B = encoding.CompressZSTDLevel(compressedResultBuf.B[:0], resultBuf.B, 1)
bb := bbPool.Get()
defer bbPool.Put(bb)
@@ -240,7 +258,7 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp
key.prefix = rollupResultCacheKeyPrefix
key.suffix = atomic.AddUint64(&rollupResultCacheKeySuffix, 1)
bb.B = key.Marshal(bb.B[:0])
rrc.c.SetBig(bb.B, tssMarshaled)
rrc.c.SetBig(bb.B, compressedResultBuf.B)
bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
metainfoBuf := rrc.c.Get(nil, bb.B)
@@ -270,7 +288,7 @@ var (
var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")
// Increment this value every time the format of the cache changes.
const rollupResultCacheVersion = 5
const rollupResultCacheVersion = 6
func marshalRollupResultCacheKey(dst []byte, funcName string, me *metricExpr, iafc *incrementalAggrFuncContext, window, step int64) []byte {
dst = append(dst, rollupResultCacheVersion)

View File

@@ -45,8 +45,19 @@ func TestRollupIderivDuplicateTimestamps(t *testing.T) {
timestamps: []int64{100},
}
n = rollupIderiv(rfa)
if n != 0 {
t.Fatalf("unexpected value; got %v; want %v", n, 0)
if !math.IsNaN(n) {
t.Fatalf("unexpected value; got %v; want %v", n, nan)
}
rfa = &rollupFuncArg{
prevTimestamp: 90,
prevValue: 10,
values: []float64{15},
timestamps: []int64{100},
}
n = rollupIderiv(rfa)
if n != 500 {
t.Fatalf("unexpected value; got %v; want %v", n, 0.5)
}
rfa = &rollupFuncArg{
@@ -171,7 +182,8 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpecte
t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
}
} else {
if v != vExpected {
eps := math.Abs(v - vExpected)
if eps > 1e-14 {
t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
}
}
@@ -347,7 +359,7 @@ func TestRollupNoWindowNoPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{2, 0, 0, 0, 0, 0, 0, 0}
valuesExpected := []float64{2, 0, 0, 0, nan, nan, nan, nan}
timestampsExpected := []int64{120, 124, 128, 132, 136, 140, 144, 148}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -378,7 +390,7 @@ func TestRollupWindowNoPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{34, 34, 34, nan}
valuesExpected := []float64{nan, nan, nan, nan}
timestampsExpected := []int64{161, 171, 181, 191}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -409,7 +421,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{12, 44, 34, 34}
valuesExpected := []float64{12, 44, 34, nan}
timestampsExpected := []int64{100, 120, 140, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -454,7 +466,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{44, 34, 34, 34}
valuesExpected := []float64{44, 34, 34, nan}
timestampsExpected := []int64{100, 120, 140, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -468,7 +480,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 54, 44, 34}
valuesExpected := []float64{nan, 54, 44, nan}
timestampsExpected := []int64{0, 50, 100, 150}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
@@ -569,10 +581,66 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{0, 33, -87, 0}
valuesExpected := []float64{123, 33, -87, 0}
timestampsExpected := []int64{10, 50, 90, 130}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("lifetime_1", func(t *testing.T) {
rc := rollupConfig{
Func: rollupLifetime,
Start: 0,
End: 160,
Step: 40,
Window: 0,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.031, 0.044, 0.04, 0.01}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("lifetime_2", func(t *testing.T) {
rc := rollupConfig{
Func: rollupLifetime,
Start: 0,
End: 160,
Step: 40,
Window: 200,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.031, 0.075, 0.115, 0.125}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("scrape_interval_1", func(t *testing.T) {
rc := rollupConfig{
Func: rollupScrapeInterval,
Start: 0,
End: 160,
Step: 40,
Window: 0,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, 0.01}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("scrape_interval_2", func(t *testing.T) {
rc := rollupConfig{
Func: rollupScrapeInterval,
Start: 0,
End: 160,
Step: 40,
Window: 80,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, 0.0125}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("changes", func(t *testing.T) {
rc := rollupConfig{
Func: rollupChanges,
@@ -685,7 +753,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("distinct", func(t *testing.T) {
t.Run("distinct_over_time_1", func(t *testing.T) {
rc := rollupConfig{
Func: rollupDistinct,
Start: 0,
@@ -699,6 +767,20 @@ func TestRollupFuncsNoWindow(t *testing.T) {
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("distinct_over_time_2", func(t *testing.T) {
rc := rollupConfig{
Func: rollupDistinct,
Start: 0,
End: 160,
Step: 40,
Window: 80,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 4, 7, 6, 3}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
}
func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExpected []float64, timestampsExpected []int64) {

View File

@@ -76,7 +76,7 @@ func putTimeseries(ts *timeseries) {
var timeseriesPool sync.Pool
func marshalTimeseriesFast(tss []*timeseries, maxSize int, step int64) []byte {
func marshalTimeseriesFast(dst []byte, tss []*timeseries, maxSize int, step int64) []byte {
if len(tss) == 0 {
logger.Panicf("BUG: tss cannot be empty")
}
@@ -92,13 +92,13 @@ func marshalTimeseriesFast(tss []*timeseries, maxSize int, step int64) []byte {
if size > maxSize {
// Do not marshal tss, since it would occupy too much space
return nil
return dst
}
// Allocate the buffer for the marshaled tss before its' marshaling.
// This should reduce memory fragmentation and memory usage.
dst := make([]byte, 0, size)
dst = marshalFastTimestamps(dst, tss[0].Timestamps)
dst = bytesutil.Resize(dst, size)
dst = marshalFastTimestamps(dst[:0], tss[0].Timestamps)
for _, ts := range tss {
dst = ts.marshalFastNoTimestamps(dst)
}

View File

@@ -74,7 +74,7 @@ func TestTimeseriesMarshalUnmarshalFast(t *testing.T) {
tssOrig = append(tssOrig, &ts)
}
buf := marshalTimeseriesFast(tssOrig, 1e6, 123)
buf := marshalTimeseriesFast(nil, tssOrig, 1e6, 123)
tssGot, err := unmarshalTimeseriesFast(buf)
if err != nil {
t.Fatalf("error in unmarshalTimeseriesFast: %s", err)

View File

@@ -63,6 +63,7 @@ var transformFuncs = map[string]transformFunc{
"label_copy": transformLabelCopy,
"label_move": transformLabelMove,
"label_transform": transformLabelTransform,
"label_value": transformLabelValue,
"union": transformUnion,
"": transformUnion, // empty func is a synonim to union
"keep_last_value": transformKeepLastValue,
@@ -308,8 +309,16 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
bbPool.Put(bb)
// Calculate quantile for each group in m
lastNonInf := func(xss []x) float64 {
for len(xss) > 0 && math.IsInf(xss[len(xss)-1].le, 0) {
lastNonInf := func(i int, xss []x) float64 {
for len(xss) > 0 {
xsLast := xss[len(xss)-1]
if xsLast.ts.Values[i] == 0 {
return nan
}
if !math.IsInf(xsLast.le, 0) {
break
}
xss = xss[:len(xss)-1]
}
if len(xss) == 0 {
@@ -318,27 +327,38 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
return xss[len(xss)-1].le
}
quantile := func(i int, phis []float64, xss []x) float64 {
vPrev := float64(0)
lePrev := float64(0)
phi := phis[i]
if math.IsNaN(phi) {
return nan
}
// Verify for broken buckets with NaN or negative values.
// Fix broken buckets.
// They are already sorted by le, so their values must be in ascending order,
// since the next bucket value includes all the previous buckets.
vPrev := float64(0)
for _, xs := range xss {
v := xs.ts.Values[i]
if math.IsNaN(v) || v < 0 {
// Broken bucket.
return nan
if math.IsNaN(v) || v < vPrev {
xs.ts.Values[i] = vPrev
} else {
vPrev = v
}
}
if len(xss) == 0 {
return nan
}
if phi < 0 {
return -inf
}
if phi > 1 {
return inf
}
vReq := xss[len(xss)-1].ts.Values[i] * phi
vLast := xss[len(xss)-1].ts.Values[i]
if vLast == 0 {
return nan
}
vReq := vLast * phi
vPrev = 0
lePrev := float64(0)
for _, xs := range xss {
v := xs.ts.Values[i]
le := xs.le
@@ -348,16 +368,16 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
continue
}
if math.IsInf(le, 0) {
return lastNonInf(xss)
return lastNonInf(i, xss)
}
if v == vPrev {
return lePrev
}
return lePrev + (le-lePrev)*(vReq-vPrev)/(v-vPrev)
}
return lastNonInf(xss)
return lastNonInf(i, xss)
}
var rvs []*timeseries
rvs := make([]*timeseries, 0, len(m))
for _, xss := range m {
sort.Slice(xss, func(i, j int) bool {
return xss[i].le < xss[j].le
@@ -881,6 +901,33 @@ func labelReplace(tss []*timeseries, srcLabel string, r *regexp.Regexp, dstLabel
return tss, nil
}
func transformLabelValue(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 2); err != nil {
return nil, err
}
labelName, err := getString(args[1], 1)
if err != nil {
return nil, fmt.Errorf("cannot get label name: %s", err)
}
rvs := args[0]
for _, ts := range rvs {
ts.MetricName.ResetMetricGroup()
labelValue := ts.MetricName.GetTagValue(labelName)
v, err := strconv.ParseFloat(string(labelValue), 64)
if err != nil {
v = nan
}
values := ts.Values
for i := range values {
values[i] = v
}
}
// Do not remove timeseries with only NaN values, so `default` could be applied to them:
// label_value(q, "label") default 123
return rvs, nil
}
func transformLn(v float64) float64 {
return math.Log(v)
}

View File

@@ -365,6 +365,22 @@ func registerStorageMetrics() {
return float64(m().TooSmallTimestampRows)
})
metrics.NewGauge(`vm_concurrent_addrows_limit_reached_total`, func() float64 {
return float64(m().AddRowsConcurrencyLimitReached)
})
metrics.NewGauge(`vm_concurrent_addrows_limit_timeout_total`, func() float64 {
return float64(m().AddRowsConcurrencyLimitTimeout)
})
metrics.NewGauge(`vm_concurrent_addrows_dropped_rows_total`, func() float64 {
return float64(m().AddRowsConcurrencyDroppedRows)
})
metrics.NewGauge(`vm_concurrent_addrows_capacity`, func() float64 {
return float64(m().AddRowsConcurrencyCapacity)
})
metrics.NewGauge(`vm_concurrent_addrows_current`, func() float64 {
return float64(m().AddRowsConcurrencyCurrent)
})
metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
return float64(tm().BigRowsCount)
})

View File

@@ -1,5 +1,5 @@
DOCKER_NAMESPACE := victoriametrics
BUILDER_IMAGE := local/builder:go1.12.7
BUILDER_IMAGE := local/builder:go1.13.0
CERTS_IMAGE := local/certs:1.0.2
package-certs:

View File

@@ -1,2 +1,2 @@
FROM golang:1.12.7
FROM golang:1.13.0
STOPSIGNAL SIGINT

10
go.mod
View File

@@ -2,17 +2,17 @@ module github.com/VictoriaMetrics/VictoriaMetrics
require (
github.com/VictoriaMetrics/fastcache v1.5.1
github.com/VictoriaMetrics/metrics v1.7.0
github.com/VictoriaMetrics/metrics v1.7.1
github.com/cespare/xxhash/v2 v2.0.1-0.20190104013014-3767db7a7e18
github.com/golang/snappy v0.0.1
github.com/google/go-cmp v0.3.0 // indirect
github.com/klauspost/compress v1.7.4
github.com/klauspost/compress v1.7.6
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/valyala/fastjson v1.4.1
github.com/valyala/gozstd v1.5.1
github.com/valyala/gozstd v1.6.1
github.com/valyala/histogram v1.0.1
github.com/valyala/quicktemplate v1.1.1
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7
github.com/valyala/quicktemplate v1.2.0
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a
)
go 1.12

20
go.sum
View File

@@ -3,8 +3,8 @@ github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI
github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/VictoriaMetrics/fastcache v1.5.1 h1:qHgHjyoNFV7jgucU8QZUuU4gcdhfs8QW1kw68OD2Lag=
github.com/VictoriaMetrics/fastcache v1.5.1/go.mod h1:+jv9Ckb+za/P1ZRg/sulP5Ni1v49daAVERr0H3CuscE=
github.com/VictoriaMetrics/metrics v1.7.0 h1:+bdBpPEMOSgOwoQFf4KHqgeAy6xiXn/uzlrKx2YSCT8=
github.com/VictoriaMetrics/metrics v1.7.0/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
github.com/VictoriaMetrics/metrics v1.7.1 h1:g2qrY6Upn8rvlvR40cGHFY0crwi4hpqF0n9vJMNsCSg=
github.com/VictoriaMetrics/metrics v1.7.1/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
@@ -20,8 +20,8 @@ github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.7.4 h1:4UqAIzZ1Ns2epCTyJ1d2xMWvxtX+FNSCYWeOFogK9nc=
github.com/klauspost/compress v1.7.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.7.6 h1:GH2karLOcuZtA5a3+KuzSU33A2cvcHGbtEWM6K4t7oU=
github.com/klauspost/compress v1.7.6/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
@@ -41,13 +41,13 @@ github.com/valyala/fastjson v1.4.1 h1:hrltpHpIpkaxll8QltMU8c3QZ5+qIiCL8yKqPFJI/y
github.com/valyala/fastjson v1.4.1/go.mod h1:nV6MsjxL2IMJQUoHDIrjEI7oLyeqK6aBD7EFWPsvP8o=
github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/gozstd v1.5.1 h1:ZLepItgu2g+B2CfVQy6KCV/as8lnJ7ef1KU6DPxQSS0=
github.com/valyala/gozstd v1.5.1/go.mod h1:oYOS+oJovjw9ewtrwEYb9+ybolEXd6pHyLMuAWN5zts=
github.com/valyala/gozstd v1.6.1 h1:oFN2mNW0kOr1fEKJuLpDwakNb6Y9fElVEBZmPEsFTUw=
github.com/valyala/gozstd v1.6.1/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
github.com/valyala/histogram v1.0.1 h1:FzA7n2Tz/wKRMejgu3PV1vw3htAklTjjuoI6z3d4KDg=
github.com/valyala/histogram v1.0.1/go.mod h1:lQy0xA4wUz2+IUnf97SivorsJIp8FxsnRd6x25q7Mto=
github.com/valyala/quicktemplate v1.1.1 h1:C58y/wN0FMTi2PR0n3onltemfFabany53j7M6SDDB8k=
github.com/valyala/quicktemplate v1.1.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
github.com/valyala/quicktemplate v1.2.0 h1:BaO1nHTkspYzmAjPXj0QiDJxai96tlcZyKcI9dyEGvM=
github.com/valyala/quicktemplate v1.2.0/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI=
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View File

@@ -0,0 +1,64 @@
package filestream
import (
"fmt"
"syscall"
"golang.org/x/sys/unix"
)
func (st *streamTracker) adviseDontNeed(n int, fdatasync bool) error {
st.length += uint64(n)
if st.fd == 0 {
return nil
}
if st.length < dontNeedBlockSize {
return nil
}
blockSize := st.length - (st.length % dontNeedBlockSize)
if fdatasync {
if err := unixFdatasync(int(st.fd)); err != nil {
return fmt.Errorf("unix.Fdatasync error: %s", err)
}
}
if err := unix.Fadvise(int(st.fd), int64(st.offset), int64(blockSize), unix.FADV_DONTNEED); err != nil {
return fmt.Errorf("unix.Fadvise(FADV_DONTNEEDED, %d, %d) error: %s", st.offset, blockSize, err)
}
st.offset += blockSize
st.length -= blockSize
return nil
}
func (st *streamTracker) close() error {
if st.fd == 0 {
return nil
}
// Advise the whole file as it shouldn't be cached.
if err := unix.Fadvise(int(st.fd), 0, 0, unix.FADV_DONTNEED); err != nil {
return fmt.Errorf("unix.Fadvise(FADV_DONTNEEDED, 0, 0) error: %s", err)
}
return nil
}
// unix.Fdatasync is missing, so put it here
func unixFdatasync(fd int) (err error) {
_, _, e1 := unix.Syscall(unix.SYS_FDATASYNC, uintptr(fd), 0, 0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
func errnoErr(e syscall.Errno) error {
switch e {
case 0:
return nil
case unix.EAGAIN:
return syscall.EAGAIN
case unix.EINVAL:
return syscall.EINVAL
case unix.ENOENT:
return syscall.ENOENT
}
return e
}

111
lib/fs/dir_remover.go Normal file
View File

@@ -0,0 +1,111 @@
package fs
import (
"os"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
)
func mustRemoveAll(path string) bool {
err := os.RemoveAll(path)
if err == nil {
// Make sure the parent directory doesn't contain references
// to the current directory.
mustSyncParentDirIfExists(path)
return true
}
if !isTemporaryNFSError(err) {
logger.Panicf("FATAL: cannot remove %q: %s", path, err)
}
// NFS prevents from removing directories with open files.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
// Schedule for later directory removal.
nfsDirRemoveFailedAttempts.Inc()
select {
case removeDirCh <- path:
default:
logger.Panicf("FATAL: cannot schedule %s for removal, since the removal queue is full (%d entries)", path, cap(removeDirCh))
}
return false
}
var nfsDirRemoveFailedAttempts = metrics.NewCounter(`vm_nfs_dir_remove_failed_attempts_total`)
var removeDirCh = make(chan string, 1024)
func dirRemover() {
const minSleepTime = 100 * time.Millisecond
const maxSleepTime = time.Second
sleepTime := minSleepTime
for {
var path string
select {
case path = <-removeDirCh:
default:
if atomic.LoadUint64(&stopDirRemover) != 0 {
return
}
time.Sleep(minSleepTime)
continue
}
if mustRemoveAll(path) {
sleepTime = minSleepTime
continue
}
// Couldn't remove the directory at the path because of NFS lock.
// Sleep for a while and try again.
// Do not limit the amount of time required for deleting the directory,
// since this may break on laggy NFS.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/162 .
time.Sleep(sleepTime)
if sleepTime < maxSleepTime {
sleepTime *= 2
} else {
logger.Errorf("failed to remove directory %q due to NFS lock; retrying later", path)
}
}
}
func isTemporaryNFSError(err error) bool {
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 for details.
errStr := err.Error()
return strings.Contains(errStr, "directory not empty") || strings.Contains(errStr, "device or resource busy")
}
var dirRemoverWG sync.WaitGroup
var stopDirRemover uint64
func init() {
dirRemoverWG.Add(1)
go func() {
defer dirRemoverWG.Done()
dirRemover()
}()
}
// MustStopDirRemover must be called in the end of graceful shutdown
// in order to wait for removing the remaining directories from removeDirCh.
//
// It is expected that nobody calls MustRemoveAll when MustStopDirRemover
// is called.
func MustStopDirRemover() {
atomic.StoreUint64(&stopDirRemover, 1)
doneCh := make(chan struct{})
go func() {
dirRemoverWG.Wait()
close(doneCh)
}()
const maxWaitTime = 5 * time.Second
select {
case <-doneCh:
return
case <-time.After(maxWaitTime):
logger.Panicf("FATAL: cannot stop dirRemover in %s", maxWaitTime)
}
}

View File

@@ -5,12 +5,13 @@ import (
"io"
"os"
"path/filepath"
"strings"
"time"
"regexp"
"sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
"golang.org/x/sys/unix"
)
// ReadAtCloser is rand-access read interface.
@@ -87,26 +88,42 @@ func MustSyncPath(path string) {
}
}
// WriteFile writes data to the given file path.
var tmpFileNum uint64
// WriteFileAtomically atomically writes data to the given file path.
//
// WriteFile returns only after the file is fully written
// WriteFile returns only after the file is fully written and synced
// to the underlying storage.
func WriteFile(path string, data []byte) error {
func WriteFileAtomically(path string, data []byte) error {
// Check for the existing file. It is expected that
// the WriteFileAtomically function cannot be called concurrently
// with the same `path`.
if IsPathExist(path) {
return fmt.Errorf("cannot create file %q, since it already exists", path)
}
f, err := filestream.Create(path, false)
n := atomic.AddUint64(&tmpFileNum, 1)
tmpPath := fmt.Sprintf("%s.tmp.%d", path, n)
f, err := filestream.Create(tmpPath, false)
if err != nil {
return fmt.Errorf("cannot create file %q: %s", path, err)
return fmt.Errorf("cannot create file %q: %s", tmpPath, err)
}
if _, err := f.Write(data); err != nil {
f.MustClose()
return fmt.Errorf("cannot write %d bytes to file %q: %s", len(data), path, err)
MustRemoveAll(tmpPath)
return fmt.Errorf("cannot write %d bytes to file %q: %s", len(data), tmpPath, err)
}
// Sync and close the file.
f.MustClose()
// Atomically move the file from tmpPath to path.
if err := os.Rename(tmpPath, path); err != nil {
// do not call MustRemoveAll(tmpPath) here, so the user could inspect
// the file contents during investigating the issue.
return fmt.Errorf("cannot move %q to %q: %s", tmpPath, path, err)
}
// Sync the containing directory, so the file is guaranteed to appear in the directory.
// See https://www.quora.com/When-should-you-fsync-the-containing-directory-in-addition-to-the-file-itself
absPath, err := filepath.Abs(path)
@@ -119,6 +136,15 @@ func WriteFile(path string, data []byte) error {
return nil
}
// IsTemporaryFileName returns true if fn matches temporary file name pattern
// from WriteFileAtomically.
func IsTemporaryFileName(fn string) bool {
return tmpFileNameRe.MatchString(fn)
}
// tmpFileNameRe is regexp for temporary file name - see WriteFileAtomically for details.
var tmpFileNameRe = regexp.MustCompile(`\.tmp\.\d+$`)
// MkdirAllIfNotExist creates the given path dir if it isn't exist.
func MkdirAllIfNotExist(path string) error {
if IsPathExist(path) {
@@ -220,62 +246,7 @@ func mustSyncParentDirIfExists(path string) {
//
// It properly handles NFS issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
func MustRemoveAll(path string) {
err := os.RemoveAll(path)
if err == nil {
// Make sure the parent directory doesn't contain references
// to the current directory.
mustSyncParentDirIfExists(path)
return
}
if !isTemporaryNFSError(err) {
logger.Panicf("FATAL: cannot remove %q: %s", path, err)
}
// NFS prevents from removing directories with open files.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
// Schedule for later directory removal.
select {
case removeDirCh <- path:
default:
logger.Panicf("FATAL: cannot schedule %s for removal, since the removal queue is full (%d entries)", path, cap(removeDirCh))
}
}
var removeDirCh = make(chan string, 1024)
func dirRemover() {
for path := range removeDirCh {
attempts := 0
for {
err := os.RemoveAll(path)
if err == nil {
break
}
if !isTemporaryNFSError(err) {
logger.Panicf("FATAL: cannot remove %q: %s", path, err)
}
// NFS prevents from removing directories with open files.
// Sleep for a while and try again in the hope open files will be closed.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
attempts++
if attempts > 10 {
logger.Panicf("FATAL: cannot remove %q in %d attempts: %s", path, attempts, err)
}
time.Sleep(100 * time.Millisecond)
}
// Make sure the parent directory doesn't contain references
// to the current directory.
mustSyncParentDirIfExists(path)
}
}
func isTemporaryNFSError(err error) bool {
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 for details.
errStr := err.Error()
return strings.Contains(errStr, "directory not empty") || strings.Contains(errStr, "device or resource busy")
}
func init() {
go dirRemover()
_ = mustRemoveAll(path)
}
// HardLinkFiles makes hard links for all the files from srcDir in dstDir.
@@ -358,3 +329,34 @@ func MustWriteData(w io.Writer, data []byte) {
logger.Panicf("BUG: writer wrote %d bytes instead of %d bytes", n, len(data))
}
}
// CreateFlockFile creates flock.lock file in the directory dir
// and returns the handler to the file.
func CreateFlockFile(dir string) (*os.File, error) {
flockFile := dir + "/flock.lock"
flockF, err := os.Create(flockFile)
if err != nil {
return nil, fmt.Errorf("cannot create lock file %q: %s", flockFile, err)
}
if err := unix.Flock(int(flockF.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
return nil, fmt.Errorf("cannot acquire lock on file %q: %s", flockFile, err)
}
return flockF, nil
}
// MustGetFreeSpace returns free space for the given directory path.
func MustGetFreeSpace(path string) uint64 {
d, err := os.Open(path)
if err != nil {
logger.Panicf("FATAL: cannot determine free disk space on %q: %s", path, err)
}
defer MustClose(d)
fd := d.Fd()
var stat unix.Statfs_t
if err := unix.Fstatfs(int(fd), &stat); err != nil {
logger.Panicf("FATAL: cannot determine free disk space on %q: %s", path, err)
}
freeSpace := uint64(stat.Bavail) * uint64(stat.Bsize)
return freeSpace
}

24
lib/fs/fs_test.go Normal file
View File

@@ -0,0 +1,24 @@
package fs
import (
"testing"
)
func TestIsTemporaryFileName(t *testing.T) {
f := func(s string, resultExpected bool) {
t.Helper()
result := IsTemporaryFileName(s)
if result != resultExpected {
t.Fatalf("unexpected IsTemporaryFileName(%q); got %v; want %v", s, result, resultExpected)
}
}
f("", false)
f(".", false)
f(".tmp", false)
f("tmp.123", false)
f(".tmp.123.xx", false)
f(".tmp.1", true)
f("asdf.dff.tmp.123", true)
f("asdf.sdfds.tmp.dfd", false)
f("dfd.sdfds.dfds.1232", false)
}

View File

@@ -423,7 +423,29 @@ var (
func Errorf(w http.ResponseWriter, format string, args ...interface{}) {
errStr := fmt.Sprintf(format, args...)
logger.Errorf("%s", errStr)
http.Error(w, errStr, http.StatusBadRequest)
// Extract statusCode from args
statusCode := http.StatusBadRequest
for _, arg := range args {
if esc, ok := arg.(*ErrorWithStatusCode); ok {
statusCode = esc.StatusCode
break
}
}
http.Error(w, errStr, statusCode)
}
// ErrorWithStatusCode is error with HTTP status code.
//
// The given StatusCode is sent to client when the error is passed to Errorf.
type ErrorWithStatusCode struct {
Err error
StatusCode int
}
// Error implements error interface.
func (e *ErrorWithStatusCode) Error() string {
return e.Err.Error()
}
func isTrivialNetworkError(err error) bool {

View File

@@ -10,27 +10,41 @@ import (
var allowedMemPercent = flag.Float64("memory.allowedPercent", 60, "Allowed percent of system memory VictoriaMetrics caches may occupy")
var allowedMemory int
var (
allowedMemory int
remainingMemory int
)
var once sync.Once
func initOnce() {
if !flag.Parsed() {
// Do not use logger.Panicf here, since logger may be uninitialized yet.
panic(fmt.Errorf("BUG: memory.Allowed must be called only after flag.Parse call"))
}
if *allowedMemPercent < 10 || *allowedMemPercent > 200 {
logger.Panicf("FATAL: -memory.allowedPercent must be in the range [10...200]; got %f", *allowedMemPercent)
}
percent := *allowedMemPercent / 100
mem := sysTotalMemory()
allowedMemory = int(float64(mem) * percent)
remainingMemory = mem - allowedMemory
logger.Infof("limiting caches to %d bytes, leaving %d bytes to the OS according to -memory.allowedPercent=%g", allowedMemory, remainingMemory, *allowedMemPercent)
}
// Allowed returns the amount of system memory allowed to use by the app.
//
// The function must be called only after flag.Parse is called.
func Allowed() int {
once.Do(func() {
if !flag.Parsed() {
// Do not use logger.Panicf here, since logger may be uninitialized yet.
panic(fmt.Errorf("BUG: memory.Allowed must be called only after flag.Parse call"))
}
if *allowedMemPercent < 10 || *allowedMemPercent > 200 {
logger.Panicf("FATAL: -memory.allowedPercent must be in the range [10...200]; got %f", *allowedMemPercent)
}
percent := *allowedMemPercent / 100
mem := sysTotalMemory()
allowedMemory = int(float64(mem) * percent)
logger.Infof("limiting caches to %d bytes of RAM according to -memory.allowedPercent=%g", allowedMemory, *allowedMemPercent)
})
once.Do(initOnce)
return allowedMemory
}
// Remaining returns the amount of memory remaining to the OS.
//
// This function must be called only after flag.Parse is called.
func Remaining() int {
once.Do(initOnce)
return remainingMemory
}

17
lib/memory/memory_bsd.go Normal file
View File

@@ -0,0 +1,17 @@
// +build freebsd openbsd dragonfly netbsd
package memory
import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// This code has been adopted from https://github.com/pbnjay/memory
func sysTotalMemory() int {
s, err := sysctlUint64("hw.physmem")
if err != nil {
logger.Panicf("FATAL: cannot determine system memory: %s", err)
}
return int(s)
}

View File

@@ -1,9 +1,6 @@
package memory
import (
"syscall"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@@ -15,16 +12,3 @@ func sysTotalMemory() int {
}
return int(s)
}
func sysctlUint64(name string) (uint64, error) {
s, err := syscall.Sysctl(name)
if err != nil {
return 0, err
}
// hack because the string conversion above drops a \0
b := []byte(s)
if len(b) < 8 {
b = append(b, 0)
}
return *(*uint64)(unsafe.Pointer(&b[0])), nil
}

22
lib/memory/sysctl.go Normal file
View File

@@ -0,0 +1,22 @@
// +build darwin freebsd openbsd dragonfly netbsd
package memory
import (
"syscall"
"unsafe"
)
// This has been adapted from github.com/pbnjay/memory.
func sysctlUint64(name string) (uint64, error) {
s, err := syscall.Sysctl(name)
if err != nil {
return 0, err
}
// hack because the string conversion above drops a \0
b := []byte(s)
if len(b) < 8 {
b = append(b, 0)
}
return *(*uint64)(unsafe.Pointer(&b[0])), nil
}

View File

@@ -164,7 +164,7 @@ func (ph *partHeader) WriteMetadata(partPath string) error {
return fmt.Errorf("cannot marshal metadata: %s", err)
}
metadataPath := partPath + "/metadata.json"
if err := fs.WriteFile(metadataPath, metadata); err != nil {
if err := fs.WriteFileAtomically(metadataPath, metadata); err != nil {
return fmt.Errorf("cannot create %q: %s", metadataPath, err)
}
return nil

View File

@@ -16,7 +16,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
"golang.org/x/sys/unix"
)
// maxParts is the maximum number of parts in the table.
@@ -60,6 +59,8 @@ const rawItemsFlushInterval = time.Second
type Table struct {
path string
flushCallback func()
partsLock sync.Mutex
parts []*partWrapper
@@ -122,8 +123,11 @@ func (pw *partWrapper) decRef() {
// OpenTable opens a table on the given path.
//
// Optional flushCallback is called every time new data batch is flushed
// to the underlying storage and becomes visible to search.
//
// The table is created if it doesn't exist yet.
func OpenTable(path string) (*Table, error) {
func OpenTable(path string, flushCallback func()) (*Table, error) {
path = filepath.Clean(path)
logger.Infof("opening table %q...", path)
startTime := time.Now()
@@ -134,13 +138,9 @@ func OpenTable(path string) (*Table, error) {
}
// Protect from concurrent opens.
flockFile := path + "/flock.lock"
flockF, err := os.Create(flockFile)
flockF, err := fs.CreateFlockFile(path)
if err != nil {
return nil, fmt.Errorf("cannot create lock file %q: %s", flockFile, err)
}
if err := unix.Flock(int(flockF.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
return nil, fmt.Errorf("cannot acquire lock on file %q: %s", flockFile, err)
return nil, err
}
// Open table parts.
@@ -150,11 +150,12 @@ func OpenTable(path string) (*Table, error) {
}
tb := &Table{
path: path,
parts: pws,
mergeIdx: uint64(time.Now().UnixNano()),
flockF: flockF,
stopCh: make(chan struct{}),
path: path,
flushCallback: flushCallback,
parts: pws,
mergeIdx: uint64(time.Now().UnixNano()),
flockF: flockF,
stopCh: make(chan struct{}),
}
tb.startPartMergers()
tb.startRawItemsFlusher()
@@ -449,6 +450,9 @@ func (tb *Table) mergeRawItemsBlocks(blocksToMerge []*inmemoryBlock) {
if err := tb.mergeParts(pws, nil, true); err != nil {
logger.Panicf("FATAL: cannot merge raw parts: %s", err)
}
if tb.flushCallback != nil {
tb.flushCallback()
}
}
for {
@@ -715,7 +719,7 @@ func (tb *Table) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isOuterP
dstPartPath := ph.Path(tb.path, mergeIdx)
fmt.Fprintf(&bb, "%s -> %s\n", tmpPartPath, dstPartPath)
txnPath := fmt.Sprintf("%s/txn/%016X", tb.path, mergeIdx)
if err := fs.WriteFile(txnPath, bb.B); err != nil {
if err := fs.WriteFileAtomically(txnPath, bb.B); err != nil {
return fmt.Errorf("cannot create transaction file %q: %s", txnPath, err)
}
@@ -808,19 +812,7 @@ func (tb *Table) maxOutPartItems() uint64 {
}
func (tb *Table) maxOutPartItemsSlow() uint64 {
// Determine the amount of free space on tb.path.
d, err := os.Open(tb.path)
if err != nil {
logger.Panicf("FATAL: cannot determine free disk space on %q: %s", tb.path, err)
}
defer fs.MustClose(d)
fd := d.Fd()
var stat unix.Statfs_t
if err := unix.Fstatfs(int(fd), &stat); err != nil {
logger.Panicf("FATAL: cannot determine free disk space on %q: %s", tb.path, err)
}
freeSpace := stat.Bavail * uint64(stat.Bsize)
freeSpace := fs.MustGetFreeSpace(tb.path)
// Calculate the maximum number of items in the output merge part
// by dividing the freeSpace by 4 and by the number of concurrent
@@ -994,7 +986,12 @@ func runTransactions(txnLock *sync.RWMutex, path string) error {
})
for _, fi := range fis {
txnPath := txnDir + "/" + fi.Name()
fn := fi.Name()
if fs.IsTemporaryFileName(fn) {
// Skip temporary files, which could be left after unclean shutdown.
continue
}
txnPath := txnDir + "/" + fn
if err := runTransaction(txnLock, path, txnPath); err != nil {
return fmt.Errorf("cannot run transaction from %q: %s", txnPath, err)
}

View File

@@ -5,6 +5,7 @@ import (
"math/rand"
"os"
"sort"
"sync/atomic"
"testing"
"time"
)
@@ -39,7 +40,7 @@ func TestTableSearchSerial(t *testing.T) {
func() {
// Re-open the table and verify the search works.
tb, err := OpenTable(path)
tb, err := OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot open table: %s", err)
}
@@ -74,7 +75,7 @@ func TestTableSearchConcurrent(t *testing.T) {
// Re-open the table and verify the search works.
func() {
tb, err := OpenTable(path)
tb, err := OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot open table: %s", err)
}
@@ -146,7 +147,11 @@ func testTableSearchSerial(tb *Table, items []string) error {
}
func newTestTable(path string, itemsCount int) (*Table, []string, error) {
tb, err := OpenTable(path)
var flushes uint64
flushCallback := func() {
atomic.AddUint64(&flushes, 1)
}
tb, err := OpenTable(path, flushCallback)
if err != nil {
return nil, nil, fmt.Errorf("cannot open table: %s", err)
}
@@ -159,6 +164,9 @@ func newTestTable(path string, itemsCount int) (*Table, []string, error) {
items[i] = item
}
tb.DebugFlush()
if itemsCount > 0 && atomic.LoadUint64(&flushes) == 0 {
return nil, nil, fmt.Errorf("unexpeted zero flushes for itemsCount=%d", itemsCount)
}
sort.Strings(items)
return tb, items, nil

View File

@@ -32,7 +32,7 @@ func benchmarkTableSearch(b *testing.B, itemsCount int) {
// Force finishing pending merges
tb.MustClose()
tb, err = OpenTable(path)
tb, err = OpenTable(path, nil)
if err != nil {
b.Fatalf("unexpected error when re-opening table %q: %s", path, err)
}

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"sync"
"sync/atomic"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -20,7 +21,7 @@ func TestTableOpenClose(t *testing.T) {
}()
// Create a new table
tb, err := OpenTable(path)
tb, err := OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot create new table: %s", err)
}
@@ -30,7 +31,7 @@ func TestTableOpenClose(t *testing.T) {
// Re-open created table multiple times.
for i := 0; i < 10; i++ {
tb, err := OpenTable(path)
tb, err := OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot open created table: %s", err)
}
@@ -44,14 +45,14 @@ func TestTableOpenMultipleTimes(t *testing.T) {
_ = os.RemoveAll(path)
}()
tb1, err := OpenTable(path)
tb1, err := OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot open table: %s", err)
}
defer tb1.MustClose()
for i := 0; i < 10; i++ {
tb2, err := OpenTable(path)
tb2, err := OpenTable(path, nil)
if err == nil {
tb2.MustClose()
t.Fatalf("expecting non-nil error when opening already opened table")
@@ -68,7 +69,11 @@ func TestTableAddItemSerial(t *testing.T) {
_ = os.RemoveAll(path)
}()
tb, err := OpenTable(path)
var flushes uint64
flushCallback := func() {
atomic.AddUint64(&flushes, 1)
}
tb, err := OpenTable(path, flushCallback)
if err != nil {
t.Fatalf("cannot open %q: %s", path, err)
}
@@ -78,6 +83,9 @@ func TestTableAddItemSerial(t *testing.T) {
// Verify items count after pending items flush.
tb.DebugFlush()
if atomic.LoadUint64(&flushes) == 0 {
t.Fatalf("unexpected zero flushes")
}
var m TableMetrics
tb.UpdateMetrics(&m)
@@ -91,7 +99,7 @@ func TestTableAddItemSerial(t *testing.T) {
testReopenTable(t, path, itemsCount)
// Add more items in order to verify merge between inmemory parts and file-based parts.
tb, err = OpenTable(path)
tb, err = OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot open %q: %s", path, err)
}
@@ -124,7 +132,7 @@ func TestTableCreateSnapshotAt(t *testing.T) {
_ = os.RemoveAll(path)
}()
tb, err := OpenTable(path)
tb, err := OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot open %q: %s", path, err)
}
@@ -155,13 +163,13 @@ func TestTableCreateSnapshotAt(t *testing.T) {
}()
// Verify snapshots contain all the data.
tb1, err := OpenTable(snapshot1)
tb1, err := OpenTable(snapshot1, nil)
if err != nil {
t.Fatalf("cannot open %q: %s", path, err)
}
defer tb1.MustClose()
tb2, err := OpenTable(snapshot2)
tb2, err := OpenTable(snapshot2, nil)
if err != nil {
t.Fatalf("cannot open %q: %s", path, err)
}
@@ -205,7 +213,11 @@ func TestTableAddItemsConcurrent(t *testing.T) {
_ = os.RemoveAll(path)
}()
tb, err := OpenTable(path)
var flushes uint64
flushCallback := func() {
atomic.AddUint64(&flushes, 1)
}
tb, err := OpenTable(path, flushCallback)
if err != nil {
t.Fatalf("cannot open %q: %s", path, err)
}
@@ -215,6 +227,10 @@ func TestTableAddItemsConcurrent(t *testing.T) {
// Verify items count after pending items flush.
tb.DebugFlush()
if atomic.LoadUint64(&flushes) == 0 {
t.Fatalf("unexpected zero flushes")
}
var m TableMetrics
tb.UpdateMetrics(&m)
if m.ItemsCount != itemsCount {
@@ -227,7 +243,7 @@ func TestTableAddItemsConcurrent(t *testing.T) {
testReopenTable(t, path, itemsCount)
// Add more items in order to verify merge between inmemory parts and file-based parts.
tb, err = OpenTable(path)
tb, err = OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot open %q: %s", path, err)
}
@@ -269,7 +285,7 @@ func testReopenTable(t *testing.T, path string, itemsCount int) {
t.Helper()
for i := 0; i < 10; i++ {
tb, err := OpenTable(path)
tb, err := OpenTable(path, nil)
if err != nil {
t.Fatalf("cannot re-open %q: %s", path, err)
}

View File

@@ -31,7 +31,7 @@ func (m *WriteRequest) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -118,7 +118,7 @@ func skipRemote(dAtA []byte) (n int, err error) {
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -176,7 +176,7 @@ func skipRemote(dAtA []byte) (n int, err error) {
}
b := dAtA[iNdEx]
iNdEx++
innerWire |= (uint64(b) & 0x7F) << shift
innerWire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}

View File

@@ -43,7 +43,7 @@ func (m *Sample) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -82,7 +82,7 @@ func (m *Sample) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
m.Timestamp |= (int64(b) & 0x7F) << shift
m.Timestamp |= int64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -128,7 +128,7 @@ func (m *TimeSeries) Unmarshal(dAtA []byte, dstLabels []Label, dstSamples []Samp
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -255,7 +255,7 @@ func (m *Label) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -283,7 +283,7 @@ func (m *Label) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
stringLen |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -312,7 +312,7 @@ func (m *Label) Unmarshal(dAtA []byte) error {
}
b := dAtA[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
stringLen |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -363,7 +363,7 @@ func skipTypes(dAtA []byte) (n int, err error) {
}
b := dAtA[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
wire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}
@@ -421,7 +421,7 @@ func skipTypes(dAtA []byte) (n int, err error) {
}
b := dAtA[iNdEx]
iNdEx++
innerWire |= (uint64(b) & 0x7F) << shift
innerWire |= uint64(b&0x7F) << shift
if b < 0x80 {
break
}

View File

@@ -205,19 +205,6 @@ func (b *Block) MarshalData(timestampsBlockOffset, valuesBlockOffset uint64) ([]
b.bh.ValuesBlockSize = uint32(len(b.valuesData))
b.values = b.values[:0]
if len(timestamps) > 1 && (b.bh.ValuesMarshalType == encoding.MarshalTypeConst || b.bh.ValuesMarshalType == encoding.MarshalTypeDeltaConst) {
// Special case - values are constant or are changed with constant rate.
// In this case we may 'cheat' by assuming timestamps are changed
// at ideal constant rate. This improves timestamps' compression rate.
minTimestamp := timestamps[0]
maxTimestamp := timestamps[len(timestamps)-1]
delta := (maxTimestamp - minTimestamp) / int64(len(timestamps)-1)
ts := minTimestamp
for i := 1; i < len(timestamps); i++ {
ts += delta
timestamps[i] = ts
}
}
b.timestampsData, b.bh.TimestampsMarshalType, b.bh.MinTimestamp = encoding.MarshalTimestamps(b.timestampsData[:0], timestamps, b.bh.PrecisionBits)
b.bh.TimestampsBlockOffset = timestampsBlockOffset
b.bh.TimestampsBlockSize = uint32(len(b.timestampsData))

View File

@@ -181,6 +181,10 @@ func unmarshalBlockHeaders(dst []blockHeader, src []byte, blockHeadersCount int)
logger.Panicf("BUG: blockHeadersCount must be greater than zero; got %d", blockHeadersCount)
}
dstLen := len(dst)
if n := dstLen + blockHeadersCount - cap(dst); n > 0 {
dst = append(dst[:cap(dst)], make([]blockHeader, n)...)
dst = dst[:dstLen]
}
var bh blockHeader
for len(src) > 0 {
tmp, err := bh.Unmarshal(src)

View File

@@ -18,6 +18,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
"github.com/VictoriaMetrics/fastcache"
xxhash "github.com/cespare/xxhash/v2"
)
@@ -52,17 +53,17 @@ type indexDB struct {
extDBLock sync.Mutex
// Cache for fast TagFilters -> TSIDs lookup.
tagCache *fastcache.Cache
tagCache *workingsetcache.Cache
// Cache for fast MetricID -> TSID lookup.
metricIDCache *fastcache.Cache
metricIDCache *workingsetcache.Cache
// Cache for fast MetricID -> MetricName lookup.
metricNameCache *fastcache.Cache
metricNameCache *workingsetcache.Cache
// Cache holding useless TagFilters entries, which have no tag filters
// matching low number of metrics.
uselessTagFiltersCache *fastcache.Cache
uselessTagFiltersCache *workingsetcache.Cache
indexSearchPool sync.Pool
@@ -101,7 +102,7 @@ type indexDB struct {
}
// openIndexDB opens index db from the given path with the given caches.
func openIndexDB(path string, metricIDCache, metricNameCache *fastcache.Cache, currHourMetricIDs, prevHourMetricIDs *atomic.Value) (*indexDB, error) {
func openIndexDB(path string, metricIDCache, metricNameCache *workingsetcache.Cache, currHourMetricIDs, prevHourMetricIDs *atomic.Value) (*indexDB, error) {
if metricIDCache == nil {
logger.Panicf("BUG: metricIDCache must be non-nil")
}
@@ -115,7 +116,7 @@ func openIndexDB(path string, metricIDCache, metricNameCache *fastcache.Cache, c
logger.Panicf("BUG: prevHourMetricIDs must be non-nil")
}
tb, err := mergeset.OpenTable(path)
tb, err := mergeset.OpenTable(path, invalidateTagCache)
if err != nil {
return nil, fmt.Errorf("cannot open indexDB %q: %s", path, err)
}
@@ -130,10 +131,10 @@ func openIndexDB(path string, metricIDCache, metricNameCache *fastcache.Cache, c
tb: tb,
name: name,
tagCache: fastcache.New(mem / 32),
tagCache: workingsetcache.New(mem/32, time.Hour),
metricIDCache: metricIDCache,
metricNameCache: metricNameCache,
uselessTagFiltersCache: fastcache.New(mem / 128),
uselessTagFiltersCache: workingsetcache.New(mem/128, time.Hour),
currHourMetricIDs: currHourMetricIDs,
prevHourMetricIDs: prevHourMetricIDs,
@@ -273,8 +274,8 @@ func (db *indexDB) decRef() {
db.SetExtDB(nil)
// Free space occupied by caches owned by db.
db.tagCache.Reset()
db.uselessTagFiltersCache.Reset()
db.tagCache.Stop()
db.uselessTagFiltersCache.Stop()
db.tagCache = nil
db.metricIDCache = nil
@@ -291,20 +292,36 @@ func (db *indexDB) decRef() {
}
func (db *indexDB) getFromTagCache(key []byte) ([]TSID, bool) {
value := db.tagCache.GetBig(nil, key)
if len(value) == 0 {
compressedBuf := tagBufPool.Get()
defer tagBufPool.Put(compressedBuf)
compressedBuf.B = db.tagCache.GetBig(compressedBuf.B[:0], key)
if len(compressedBuf.B) == 0 {
return nil, false
}
tsids, err := unmarshalTSIDs(nil, value)
buf := tagBufPool.Get()
defer tagBufPool.Put(buf)
var err error
buf.B, err = encoding.DecompressZSTD(buf.B[:0], compressedBuf.B)
if err != nil {
logger.Panicf("FATAL: cannot decompress tsids from tagCache: %s", err)
}
tsids, err := unmarshalTSIDs(nil, buf.B)
if err != nil {
logger.Panicf("FATAL: cannot unmarshal tsids from tagCache: %s", err)
}
return tsids, true
}
var tagBufPool bytesutil.ByteBufferPool
func (db *indexDB) putToTagCache(tsids []TSID, key []byte) {
value := marshalTSIDs(nil, tsids)
db.tagCache.SetBig(key, value)
buf := tagBufPool.Get()
buf.B = marshalTSIDs(buf.B[:0], tsids)
compressedBuf := tagBufPool.Get()
compressedBuf.B = encoding.CompressZSTDLevel(compressedBuf.B[:0], buf.B, 1)
tagBufPool.Put(buf)
db.tagCache.SetBig(key, compressedBuf.B)
tagBufPool.Put(compressedBuf)
}
func (db *indexDB) getFromMetricIDCache(dst *TSID, metricID uint64) error {
@@ -388,7 +405,7 @@ func unmarshalTSIDs(dst []TSID, src []byte) ([]TSID, error) {
return dst, nil
}
func (db *indexDB) invalidateTagCache() {
func invalidateTagCache() {
// This function must be fast, since it is called each
// time new timeseries is added.
atomic.AddUint64(&tagFiltersKeyGen, 1)
@@ -496,8 +513,8 @@ func (db *indexDB) createTSIDByName(dst *TSID, metricName []byte) error {
return fmt.Errorf("cannot create indexes: %s", err)
}
// Invalidate tag cache, since it doesn't contain tags for the created mn -> TSID mapping.
db.invalidateTagCache()
// There is no need in invalidating tag cache, since it is invalidated
// on db.tb flush via invalidateTagCache flushCallback passed to OpenTable.
return nil
}
@@ -873,7 +890,10 @@ func (db *indexDB) DeleteTSIDs(tfss []*TagFilters) (int, error) {
db.updateDeletedMetricIDs(metricIDs)
// Reset TagFilters -> TSIDS cache, since it may contain deleted TSIDs.
db.invalidateTagCache()
invalidateTagCache()
// Do not reset uselessTagFiltersCache, since the found metricIDs
// on cache miss are filtered out later with deletedMetricIDs.
// Delete TSIDs in the extDB.
if db.doExtDB(func(extDB *indexDB) {
@@ -974,7 +994,8 @@ func (db *indexDB) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics int)
extTSIDs, err = is.searchTSIDs(tfss, tr, maxMetrics)
extDB.putIndexSearch(is)
db.putToTagCache(tsids, tfKeyExtBuf.B)
sort.Slice(extTSIDs, func(i, j int) bool { return extTSIDs[i].Less(&extTSIDs[j]) })
extDB.putToTagCache(extTSIDs, tfKeyExtBuf.B)
}) {
if err != nil {
return nil, err
@@ -1218,6 +1239,82 @@ func (is *indexSearch) updateMetricIDsByMetricNameMatch(metricIDs, srcMetricIDs
return nil
}
func (is *indexSearch) getTagFilterWithMinMetricIDsCountOptimized(tfs *TagFilters, tr TimeRange, maxMetrics int) (*tagFilter, map[uint64]struct{}, error) {
// Try fast path with the minimized number of maxMetrics.
maxMetricsAdjusted := is.adjustMaxMetricsAdaptive(tr, maxMetrics)
minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetricsAdjusted)
if err == nil {
return minTf, minMetricIDs, nil
}
if err != errTooManyMetrics {
return nil, nil, err
}
// All the tag filters match too many metrics.
// Slow path: try filtering the matching metrics by time range.
// This should work well for cases when old metrics are constantly substituted
// by big number of new metrics. For example, prometheus-operator creates many new
// metrics for each new deployment.
//
// Allow fetching up to 20*maxMetrics metrics for the given time range
// in the hope these metricIDs will be filtered out by other filters later.
maxTimeRangeMetrics := 20 * maxMetrics
metricIDsForTimeRange, err := is.getMetricIDsForTimeRange(tr, maxTimeRangeMetrics+1)
if err == errMissingMetricIDsForDate {
// Slow path: try to select find the tag filter without maxMetrics adjustement.
minTf, minMetricIDs, err = is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetrics)
if err == nil {
return minTf, minMetricIDs, nil
}
if err != errTooManyMetrics {
return nil, nil, err
}
return nil, nil, fmt.Errorf("cannot find tag filter matching less than %d time series; "+
"either increase -search.maxUniqueTimeseries or use more specific tag filters", maxMetrics)
}
if err != nil {
return nil, nil, err
}
if len(metricIDsForTimeRange) <= maxTimeRangeMetrics {
return nil, metricIDsForTimeRange, nil
}
// Slow path: try to select the tag filter without maxMetrics adjustement.
minTf, minMetricIDs, err = is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetrics)
if err == nil {
return minTf, minMetricIDs, nil
}
if err != errTooManyMetrics {
return nil, nil, err
}
return nil, nil, fmt.Errorf("more than %d time series found on the time range %s; either increase -search.maxUniqueTimeseries or shrink the time range",
maxMetrics, tr.String())
}
const maxDaysForDateMetricIDs = 40
func (is *indexSearch) adjustMaxMetricsAdaptive(tr TimeRange, maxMetrics int) int {
minDate := uint64(tr.MinTimestamp) / msecPerDay
maxDate := uint64(tr.MaxTimestamp) / msecPerDay
if maxDate-minDate > maxDaysForDateMetricIDs {
// Cannot reduce maxMetrics for the given time range,
// since it is expensive extracting metricIDs for the given tr.
return maxMetrics
}
hmPrev := is.db.prevHourMetricIDs.Load().(*hourMetricIDs)
if !hmPrev.isFull {
return maxMetrics
}
hourMetrics := len(hmPrev.m)
if hourMetrics >= 256 && maxMetrics > hourMetrics/4 {
// It is cheaper to filter on the hour or day metrics if the minimum
// number of matching metrics across tfs exceeds hourMetrics / 4.
return hourMetrics / 4
}
return maxMetrics
}
func (is *indexSearch) getTagFilterWithMinMetricIDsCountAdaptive(tfs *TagFilters, maxMetrics int) (*tagFilter, map[uint64]struct{}, error) {
kb := &is.kb
kb.B = append(kb.B[:0], uselessMultiTagFiltersKeyPrefix)
@@ -1266,29 +1363,6 @@ func (is *indexSearch) getTagFilterWithMinMetricIDsCountAdaptive(tfs *TagFilters
var errTooManyMetrics = errors.New("all the tag filters match too many metrics")
const maxDaysForDateMetricIDs = 40
func (is *indexSearch) adjustMaxMetricsAdaptive(tr TimeRange, maxMetrics int) int {
minDate := uint64(tr.MinTimestamp) / msecPerDay
maxDate := uint64(tr.MaxTimestamp) / msecPerDay
if maxDate-minDate > maxDaysForDateMetricIDs {
// Cannot reduce maxMetrics for the given time range,
// since the it is expensive extracting metricIDs for the given tr.
return maxMetrics
}
hmPrev := is.db.prevHourMetricIDs.Load().(*hourMetricIDs)
if !hmPrev.isFull {
return maxMetrics
}
hourMetrics := len(hmPrev.m)
if hourMetrics >= 256 && maxMetrics > hourMetrics/4 {
// It is cheaper to filter on the hour or day metrics if the minimum
// number of matching metrics across tfs exceeds hourMetrics / 4.
return hourMetrics / 4
}
return maxMetrics
}
func (is *indexSearch) getTagFilterWithMinMetricIDsCount(tfs *TagFilters, maxMetrics int) (*tagFilter, map[uint64]struct{}, error) {
var minMetricIDs map[uint64]struct{}
var minTf *tagFilter
@@ -1463,37 +1537,9 @@ func (is *indexSearch) updateMetricIDsForTagFilters(metricIDs map[uint64]struct{
// Sort tag filters for faster ts.Seek below.
sort.Slice(tfs.tfs, func(i, j int) bool { return bytes.Compare(tfs.tfs[i].prefix, tfs.tfs[j].prefix) < 0 })
maxMetricsAdjusted := is.adjustMaxMetricsAdaptive(tr, maxMetrics)
minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetricsAdjusted)
minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountOptimized(tfs, tr, maxMetrics)
if err != nil {
if err != errTooManyMetrics {
return err
}
// All the tag filters match too many metrics.
// Slow path: try filtering the matching metrics by time range.
// This should work well for cases when old metrics are constantly substituted
// by big number of new metrics. For example, prometheus-operator creates many new
// metrics for each new deployment.
//
// Allow fetching up to 20*maxMetrics metrics for the given time range
// in the hope these metricIDs will be filtered out by other filters below.
maxTimeRangeMetrics := 20 * maxMetrics
metricIDsForTimeRange, err := is.getMetricIDsForTimeRange(tr, maxTimeRangeMetrics+1)
if err == errMissingMetricIDsForDate {
return fmt.Errorf("cannot find tag filter matching less than %d time series; either increase -search.maxUniqueTimeseries or use more specific tag filters",
maxMetrics)
}
if err != nil {
return err
}
if len(metricIDsForTimeRange) > maxTimeRangeMetrics {
return fmt.Errorf("more than %d time series found on the time range %s; either increase -search.maxUniqueTimeseries or shrink the time range",
maxTimeRangeMetrics, tr.String())
}
minMetricIDs = metricIDsForTimeRange
minTf = nil
return err
}
// Find intersection of minTf with other tfs.

View File

@@ -12,7 +12,7 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/fastcache"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
)
func TestMarshalUnmarshalTSIDs(t *testing.T) {
@@ -57,10 +57,10 @@ func TestMarshalUnmarshalTSIDs(t *testing.T) {
}
func TestIndexDBOpenClose(t *testing.T) {
metricIDCache := fastcache.New(1234)
metricNameCache := fastcache.New(1234)
defer metricIDCache.Reset()
defer metricNameCache.Reset()
metricIDCache := workingsetcache.New(1234, time.Hour)
metricNameCache := workingsetcache.New(1234, time.Hour)
defer metricIDCache.Stop()
defer metricNameCache.Stop()
var hmCurr atomic.Value
hmCurr.Store(&hourMetricIDs{})
@@ -85,10 +85,10 @@ func TestIndexDB(t *testing.T) {
const metricGroups = 10
t.Run("serial", func(t *testing.T) {
metricIDCache := fastcache.New(1234)
metricNameCache := fastcache.New(1234)
defer metricIDCache.Reset()
defer metricNameCache.Reset()
metricIDCache := workingsetcache.New(1234, time.Hour)
metricNameCache := workingsetcache.New(1234, time.Hour)
defer metricIDCache.Stop()
defer metricNameCache.Stop()
var hmCurr atomic.Value
hmCurr.Store(&hourMetricIDs{})
@@ -142,10 +142,10 @@ func TestIndexDB(t *testing.T) {
})
t.Run("concurrent", func(t *testing.T) {
metricIDCache := fastcache.New(1234)
metricNameCache := fastcache.New(1234)
defer metricIDCache.Reset()
defer metricNameCache.Reset()
metricIDCache := workingsetcache.New(1234, time.Hour)
metricNameCache := workingsetcache.New(1234, time.Hour)
defer metricIDCache.Stop()
defer metricNameCache.Stop()
var hmCurr atomic.Value
hmCurr.Store(&hourMetricIDs{})

View File

@@ -3,20 +3,50 @@ package storage
import (
"fmt"
"os"
"regexp"
"strconv"
"sync/atomic"
"testing"
"time"
"github.com/VictoriaMetrics/fastcache"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
)
func BenchmarkRegexpFilterMatch(b *testing.B) {
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
re := regexp.MustCompile(`.*foo-bar-baz.*`)
b := []byte("fdsffd foo-bar-baz assd fdsfad dasf dsa")
for pb.Next() {
if !re.Match(b) {
panic("BUG: regexp must match!")
}
b[0]++
}
})
}
func BenchmarkRegexpFilterMismatch(b *testing.B) {
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
re := regexp.MustCompile(`.*foo-bar-baz.*`)
b := []byte("fdsffd foo-bar sfddsf assd nmn,mfdsdsakj")
for pb.Next() {
if re.Match(b) {
panic("BUG: regexp mustn't match!")
}
b[0]++
}
})
}
func BenchmarkIndexDBAddTSIDs(b *testing.B) {
const recordsPerLoop = 1e3
metricIDCache := fastcache.New(1234)
metricNameCache := fastcache.New(1234)
defer metricIDCache.Reset()
defer metricNameCache.Reset()
metricIDCache := workingsetcache.New(1234, time.Hour)
metricNameCache := workingsetcache.New(1234, time.Hour)
defer metricIDCache.Stop()
defer metricNameCache.Stop()
var hmCurr atomic.Value
hmCurr.Store(&hourMetricIDs{})
@@ -78,86 +108,11 @@ func benchmarkIndexDBAddTSIDs(db *indexDB, tsid *TSID, mn *MetricName, startOffs
}
}
func BenchmarkIndexDBSearchTSIDs(b *testing.B) {
metricIDCache := fastcache.New(1234)
metricNameCache := fastcache.New(1234)
defer metricIDCache.Reset()
defer metricNameCache.Reset()
var hmCurr atomic.Value
hmCurr.Store(&hourMetricIDs{})
var hmPrev atomic.Value
hmPrev.Store(&hourMetricIDs{})
const dbName = "bench-index-db-search-tsids"
db, err := openIndexDB(dbName, metricIDCache, metricNameCache, &hmCurr, &hmPrev)
if err != nil {
b.Fatalf("cannot open indexDB: %s", err)
}
defer func() {
db.MustClose()
if err := os.RemoveAll(dbName); err != nil {
b.Fatalf("cannot remove indexDB: %s", err)
}
}()
const recordsCount = 1e5
// Fill the db with recordsCount records.
var mn MetricName
mn.MetricGroup = []byte("rps")
for i := 0; i < 2; i++ {
key := fmt.Sprintf("key_%d", i)
value := fmt.Sprintf("value_%d", i)
mn.AddTag(key, value)
}
var tsid TSID
var metricName []byte
is := db.getIndexSearch()
defer db.putIndexSearch(is)
for i := 0; i < recordsCount; i++ {
mn.sortTags()
metricName = mn.Marshal(metricName[:0])
if err := is.GetOrCreateTSIDByName(&tsid, metricName); err != nil {
b.Fatalf("cannot insert record: %s", err)
}
}
b.SetBytes(1)
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
tags := []Tag{
{[]byte("key_0"), []byte("value_0")},
{[]byte("key_1"), []byte("value_1")},
}
var tfs TagFilters
tfss := []*TagFilters{&tfs}
i := 0
for pb.Next() {
tfs.Reset()
for j := range tags {
if err := tfs.Add(tags[j].Key, tags[j].Value, false, false); err != nil {
panic(fmt.Errorf("BUG: unexpected error: %s", err))
}
}
tsids, err := db.searchTSIDs(tfss, TimeRange{}, 1e5)
if err != nil {
panic(fmt.Errorf("unexpected error in search for tfs=%s: %s", &tfs, err))
}
if len(tsids) == 0 && i < recordsCount {
panic(fmt.Errorf("zero tsids found for tfs=%s", &tfs))
}
i++
}
})
}
func BenchmarkIndexDBGetTSIDs(b *testing.B) {
metricIDCache := fastcache.New(1234)
metricNameCache := fastcache.New(1234)
defer metricIDCache.Reset()
defer metricNameCache.Reset()
metricIDCache := workingsetcache.New(1234, time.Hour)
metricNameCache := workingsetcache.New(1234, time.Hour)
defer metricIDCache.Stop()
defer metricNameCache.Stop()
var hmCurr atomic.Value
hmCurr.Store(&hourMetricIDs{})

View File

@@ -9,6 +9,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
@@ -366,17 +367,8 @@ func (mn *MetricName) Unmarshal(src []byte) error {
}
}
// Verify no identical tag keys.
if len(mn.Tags) > 0 {
prevKey := mn.Tags[0].Key
for i := range mn.Tags[1:] {
t := &mn.Tags[1+i]
if bytes.Equal(t.Key, prevKey) {
return fmt.Errorf("found duplicate key %q", prevKey)
}
prevKey = t.Key
}
}
// There is no need in verifying for identical tag keys,
// since they must be handled in MetricName.Marshal inside marshalTags.
return nil
}
@@ -392,9 +384,18 @@ const maxLabelNameLen = 256
const maxLabelValueLen = 16 * 1024
// The maximum number of labels per each timeseries.
var maxLabelsPerTimeseries = 30
// SetMaxLabelsPerTimeseries sets the limit on the number of labels
// per each time series.
//
// Superflouos lables are dropped.
const maxLabelsPerTimeseries = 30
// Superfouos labels are dropped.
func SetMaxLabelsPerTimeseries(maxLabels int) {
if maxLabels <= 0 {
logger.Panicf("BUG: maxLabels must be positive; got %d", maxLabels)
}
maxLabelsPerTimeseries = maxLabels
}
// MarshalMetricNameRaw marshals labels to dst and returns the result.
//
@@ -574,8 +575,15 @@ func (ts *canonicalTagsSort) Swap(i, j int) {
}
func marshalTags(dst []byte, tags []Tag) []byte {
var prevKey []byte
for i := range tags {
dst = tags[i].Marshal(dst)
t := &tags[i]
if string(prevKey) == string(t.Key) {
// Skip duplicate keys, since they aren't allowed in Prometheus data model.
continue
}
prevKey = t.Key
dst = t.Marshal(dst)
}
return dst
}

View File

@@ -34,6 +34,32 @@ func testMetricNameSortTags(t *testing.T, tags, expectedTags []string) {
}
}
func TestMetricNameMarshalDuplicateKeys(t *testing.T) {
var mn MetricName
mn.MetricGroup = []byte("xxx")
mn.AddTag("foo", "bar")
mn.AddTag("duplicate", "tag")
mn.AddTag("duplicate", "tag")
mn.AddTag("tt", "xx")
mn.AddTag("duplicate", "tag2")
var mnExpected MetricName
mnExpected.MetricGroup = []byte("xxx")
mnExpected.AddTag("duplicate", "tag")
mnExpected.AddTag("foo", "bar")
mnExpected.AddTag("tt", "xx")
mn.sortTags()
data := mn.Marshal(nil)
var mn1 MetricName
if err := mn1.Unmarshal(data); err != nil {
t.Fatalf("cannot unmarshal mn %s: %s", &mn, err)
}
if !reflect.DeepEqual(&mnExpected, &mn1) {
t.Fatalf("unexpected mn unmarshaled;\ngot\n%+v\nwant\n%+v", &mn1, &mnExpected)
}
}
func TestMetricNameMarshalUnmarshal(t *testing.T) {
for i := 0; i < 10; i++ {
for tagsCount := 0; tagsCount < 10; tagsCount++ {

View File

@@ -144,7 +144,7 @@ func (p *part) MustClose() {
p.valuesFile.MustClose()
p.indexFile.MustClose()
isBig := p.ph.RowsCount > maxRowsPerSmallPart
isBig := p.ph.RowsCount > maxRowsPerSmallPart()
p.ibCache.Reset(isBig)
}

View File

@@ -51,6 +51,7 @@ func (ps *partSearch) reset() {
ps.p = nil
ps.tsids = ps.tsids[:0]
ps.tsidIdx = 0
ps.fetchData = true
ps.metaindex = nil
ps.ibCache = nil
ps.bhs = nil

View File

@@ -19,23 +19,18 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"golang.org/x/sys/unix"
)
// The maximum number of rows in a small part.
//
// Small part merges cannot be interrupted during server stop, so this value
// must be small enough to complete a merge
// of `maxRowsPerSmallPart * defaultPartsToMerge` rows in a reasonable amount
// of time (up to a a minute).
//
// Additionally, this number limits the maximum size of small parts storage.
// Production simultation shows that the required size of the storage
// may be estimated as:
//
// maxRowsPerSmallPart * 2 * defaultPartsToMerge * mergeWorkers
//
const maxRowsPerSmallPart = 300e6
func maxRowsPerSmallPart() uint64 {
// Small parts are cached in the OS page cache,
// so limit the number of rows for small part
// by the remaining free RAM.
mem := memory.Remaining()
if mem <= 0 {
return 100e6
}
return uint64(mem) / defaultPartsToMerge
}
// The maximum number of rows per big part.
//
@@ -813,8 +808,8 @@ func (pt *partition) partsMerger(mergerFunc func(isFinal bool) error) error {
}
}
func (pt *partition) maxOutPartRows() uint64 {
freeSpace := mustGetFreeDiskSpace(pt.bigPartsPath)
func maxRowsByPath(path string) uint64 {
freeSpace := mustGetFreeDiskSpace(path)
// Calculate the maximum number of rows in the output merge part
// by dividing the freeSpace by the number of concurrent
@@ -822,7 +817,11 @@ func (pt *partition) maxOutPartRows() uint64 {
// This assumes each row is compressed into 1 byte. Production
// simulation shows that each row usually occupies up to 0.5 bytes,
// so this is quite safe assumption.
return freeSpace / uint64(mergeWorkers)
maxRows := freeSpace / uint64(mergeWorkers)
if maxRows > maxRowsPerBigPart {
maxRows = maxRowsPerBigPart
}
return maxRows
}
func mustGetFreeDiskSpace(path string) uint64 {
@@ -838,18 +837,7 @@ func mustGetFreeDiskSpace(path string) uint64 {
// Slow path.
// Determine the amount of free space on bigPartsPath.
d, err := os.Open(path)
if err != nil {
logger.Panicf("FATAL: cannot determine free disk space on %q: %s", path, err)
}
defer fs.MustClose(d)
fd := d.Fd()
var stat unix.Statfs_t
if err := unix.Fstatfs(int(fd), &stat); err != nil {
logger.Panicf("FATAL: cannot determine free disk space on %q: %s", path, err)
}
e.freeSpace = stat.Bavail * uint64(stat.Bsize)
e.freeSpace = fs.MustGetFreeSpace(path)
e.updateTime = time.Now()
freeSpaceMap[path] = e
return e.freeSpace
@@ -866,10 +854,7 @@ type freeSpaceEntry struct {
}
func (pt *partition) mergeBigParts(isFinal bool) error {
maxRows := pt.maxOutPartRows()
if maxRows > maxRowsPerBigPart {
maxRows = maxRowsPerBigPart
}
maxRows := maxRowsByPath(pt.bigPartsPath)
pt.partsLock.Lock()
pws := getPartsToMerge(pt.bigParts, maxRows, isFinal)
@@ -888,7 +873,15 @@ func (pt *partition) mergeBigParts(isFinal bool) error {
}
func (pt *partition) mergeSmallParts(isFinal bool) error {
maxRows := uint64(maxRowsPerSmallPart * defaultPartsToMerge)
maxRows := maxRowsByPath(pt.smallPartsPath)
if maxRows > maxRowsPerSmallPart() {
// The output part may go to big part,
// so make sure it as enough space.
maxBigPartRows := maxRowsByPath(pt.bigPartsPath)
if maxRows > maxBigPartRows {
maxRows = maxBigPartRows
}
}
pt.partsLock.Lock()
pws := getPartsToMerge(pt.smallParts, maxRows, isFinal)
@@ -951,7 +944,7 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
for _, pw := range pws {
outRowsCount += pw.p.ph.RowsCount
}
isBigPart := outRowsCount > maxRowsPerSmallPart
isBigPart := outRowsCount > maxRowsPerSmallPart()
nocache := isBigPart
// Prepare BlockStreamWriter for destination part.
@@ -1008,7 +1001,7 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
}
fmt.Fprintf(&bb, "%s -> %s\n", tmpPartPath, dstPartPath)
txnPath := fmt.Sprintf("%s/txn/%016X", ptPath, mergeIdx)
if err := fs.WriteFile(txnPath, bb.B); err != nil {
if err := fs.WriteFileAtomically(txnPath, bb.B); err != nil {
return fmt.Errorf("cannot create transaction file %q: %s", txnPath, err)
}
@@ -1367,7 +1360,12 @@ func runTransactions(txnLock *sync.RWMutex, pathPrefix1, pathPrefix2, path strin
})
for _, fi := range fis {
txnPath := txnDir + "/" + fi.Name()
fn := fi.Name()
if fs.IsTemporaryFileName(fn) {
// Skip temporary files, which could be left after unclean shutdown.
continue
}
txnPath := txnDir + "/" + fn
if err := runTransaction(txnLock, pathPrefix1, pathPrefix2, txnPath); err != nil {
return fmt.Errorf("cannot run transaction from %q: %s", txnPath, err)
}

View File

@@ -6,11 +6,8 @@ import (
"testing"
)
func TestPartitionMaxOutPartRows(t *testing.T) {
pt := &partition{
bigPartsPath: ".",
}
n := pt.maxOutPartRows()
func TestPartitionMaxRowsByPath(t *testing.T) {
n := maxRowsByPath(".")
if n < 1e3 {
t.Fatalf("too small number of rows can be created in the current directory: %d", n)
}

View File

@@ -20,8 +20,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
"github.com/VictoriaMetrics/fastcache"
"golang.org/x/sys/unix"
)
const maxRetentionMonths = 12 * 100
@@ -40,16 +40,16 @@ type Storage struct {
tb *table
// tsidCache is MetricName -> TSID cache.
tsidCache *fastcache.Cache
tsidCache *workingsetcache.Cache
// metricIDCache is MetricID -> TSID cache.
metricIDCache *fastcache.Cache
metricIDCache *workingsetcache.Cache
// metricNameCache is MetricID -> MetricName cache.
metricNameCache *fastcache.Cache
metricNameCache *workingsetcache.Cache
// dateMetricIDCache is (Date, MetricID) cache.
dateMetricIDCache *fastcache.Cache
dateMetricIDCache *workingsetcache.Cache
// Fast cache for MetricID values occured during the current hour.
currHourMetricIDs atomic.Value
@@ -68,6 +68,10 @@ type Storage struct {
tooSmallTimestampRows uint64
tooBigTimestampRows uint64
addRowsConcurrencyLimitReached uint64
addRowsConcurrencyLimitTimeout uint64
addRowsConcurrencyDroppedRows uint64
}
// OpenStorage opens storage on the given path with the given number of retention months.
@@ -99,13 +103,10 @@ func OpenStorage(path string, retentionMonths int) (*Storage, error) {
return nil, fmt.Errorf("cannot create %q: %s", snapshotsPath, err)
}
flockFile := path + "/flock.lock"
flockF, err := os.Create(flockFile)
// Protect from concurrent opens.
flockF, err := fs.CreateFlockFile(path)
if err != nil {
return nil, fmt.Errorf("cannot create lock file %q: %s", flockFile, err)
}
if err := unix.Flock(int(flockF.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
return nil, fmt.Errorf("cannot acquire lock on file %q: %s", flockFile, err)
return nil, err
}
s.flockF = flockF
@@ -277,6 +278,12 @@ type Metrics struct {
TooSmallTimestampRows uint64
TooBigTimestampRows uint64
AddRowsConcurrencyLimitReached uint64
AddRowsConcurrencyLimitTimeout uint64
AddRowsConcurrencyDroppedRows uint64
AddRowsConcurrencyCapacity uint64
AddRowsConcurrencyCurrent uint64
TSIDCacheSize uint64
TSIDCacheSizeBytes uint64
TSIDCacheRequests uint64
@@ -317,6 +324,12 @@ func (s *Storage) UpdateMetrics(m *Metrics) {
m.TooSmallTimestampRows += atomic.LoadUint64(&s.tooSmallTimestampRows)
m.TooBigTimestampRows += atomic.LoadUint64(&s.tooBigTimestampRows)
m.AddRowsConcurrencyLimitReached += atomic.LoadUint64(&s.addRowsConcurrencyLimitReached)
m.AddRowsConcurrencyLimitTimeout += atomic.LoadUint64(&s.addRowsConcurrencyLimitTimeout)
m.AddRowsConcurrencyDroppedRows += atomic.LoadUint64(&s.addRowsConcurrencyDroppedRows)
m.AddRowsConcurrencyCapacity = uint64(cap(addRowsConcurrencyCh))
m.AddRowsConcurrencyCurrent = uint64(len(addRowsConcurrencyCh))
var cs fastcache.Stats
s.tsidCache.UpdateStats(&cs)
m.TSIDCacheSize += cs.EntriesCount
@@ -448,10 +461,10 @@ func (s *Storage) MustClose() {
s.idb().MustClose()
// Save caches.
s.mustSaveCache(s.tsidCache, "MetricName->TSID", "metricName_tsid")
s.mustSaveCache(s.metricIDCache, "MetricID->TSID", "metricID_tsid")
s.mustSaveCache(s.metricNameCache, "MetricID->MetricName", "metricID_metricName")
s.mustSaveCache(s.dateMetricIDCache, "Date->MetricID", "date_metricID")
s.mustSaveAndStopCache(s.tsidCache, "MetricName->TSID", "metricName_tsid")
s.mustSaveAndStopCache(s.metricIDCache, "MetricID->TSID", "metricID_tsid")
s.mustSaveAndStopCache(s.metricNameCache, "MetricID->MetricName", "metricID_metricName")
s.mustSaveAndStopCache(s.dateMetricIDCache, "Date->MetricID", "date_metricID")
hmCurr := s.currHourMetricIDs.Load().(*hourMetricIDs)
s.mustSaveHourMetricIDs(hmCurr, "curr_hour_metric_ids")
@@ -530,11 +543,11 @@ func (s *Storage) mustSaveHourMetricIDs(hm *hourMetricIDs, name string) {
logger.Infof("saved %s to %q in %s; entriesCount: %d; sizeBytes: %d", name, path, time.Since(startTime), len(hm.m), len(dst))
}
func (s *Storage) mustLoadCache(info, name string, sizeBytes int) *fastcache.Cache {
func (s *Storage) mustLoadCache(info, name string, sizeBytes int) *workingsetcache.Cache {
path := s.cachePath + "/" + name
logger.Infof("loading %s cache from %q...", info, path)
startTime := time.Now()
c := fastcache.LoadFromFileOrNew(path, sizeBytes)
c := workingsetcache.Load(path, sizeBytes, time.Hour)
var cs fastcache.Stats
c.UpdateStats(&cs)
logger.Infof("loaded %s cache from %q in %s; entriesCount: %d; sizeBytes: %d",
@@ -542,17 +555,16 @@ func (s *Storage) mustLoadCache(info, name string, sizeBytes int) *fastcache.Cac
return c
}
func (s *Storage) mustSaveCache(c *fastcache.Cache, info, name string) {
gomaxprocs := runtime.GOMAXPROCS(-1)
func (s *Storage) mustSaveAndStopCache(c *workingsetcache.Cache, info, name string) {
path := s.cachePath + "/" + name
logger.Infof("saving %s cache to %q...", info, path)
startTime := time.Now()
if err := c.SaveToFileConcurrent(path, gomaxprocs); err != nil {
if err := c.Save(path); err != nil {
logger.Panicf("FATAL: cannot save %s cache to %q: %s", info, path, err)
}
var cs fastcache.Stats
c.UpdateStats(&cs)
c.Reset()
c.Stop()
logger.Infof("saved %s cache to %q in %s; entriesCount: %d; sizeBytes: %d",
info, path, time.Since(startTime), cs.EntriesCount, cs.BytesSize)
}
@@ -722,15 +734,24 @@ func (s *Storage) AddRows(mrs []MetricRow, precisionBits uint8) error {
// Limit the number of concurrent goroutines that may add rows to the storage.
// This should prevent from out of memory errors and CPU trashing when too many
// goroutines call AddRows.
t := timerpool.Get(addRowsTimeout)
select {
case addRowsConcurrencyCh <- struct{}{}:
timerpool.Put(t)
defer func() { <-addRowsConcurrencyCh }()
case <-t.C:
timerpool.Put(t)
return fmt.Errorf("Cannot add %d rows to storage in %s, since it is overloaded with %d concurrent writers. Add more CPUs or reduce load",
len(mrs), addRowsTimeout, cap(addRowsConcurrencyCh))
default:
// Sleep for a while until giving up
atomic.AddUint64(&s.addRowsConcurrencyLimitReached, 1)
t := timerpool.Get(addRowsTimeout)
select {
case addRowsConcurrencyCh <- struct{}{}:
timerpool.Put(t)
defer func() { <-addRowsConcurrencyCh }()
case <-t.C:
timerpool.Put(t)
atomic.AddUint64(&s.addRowsConcurrencyLimitTimeout, 1)
atomic.AddUint64(&s.addRowsConcurrencyDroppedRows, uint64(len(mrs)))
return fmt.Errorf("Cannot add %d rows to storage in %s, since it is overloaded with %d concurrent writers. Add more CPUs or reduce load",
len(mrs), addRowsTimeout, cap(addRowsConcurrencyCh))
}
}
// Add rows to the storage.
@@ -748,7 +769,9 @@ var (
)
func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]rawRow, error) {
var errors []error
// Return only the last error, since it has no sense in returning all errors.
var lastError error
var is *indexSearch
var mn *MetricName
var kb *bytesutil.ByteBuffer
@@ -771,11 +794,13 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
}
if mr.Timestamp < minTimestamp {
// Skip rows with too small timestamps outside the retention.
lastError = fmt.Errorf("cannot insert row with too small timestamp %d outside the retention; minimum allowed timestamp is %d", mr.Timestamp, minTimestamp)
atomic.AddUint64(&s.tooSmallTimestampRows, 1)
continue
}
if mr.Timestamp > maxTimestamp {
// Skip rows with too big timestamps significantly exceeding the current time.
lastError = fmt.Errorf("cannot insert row with too big timestamp %d exceeding the current time; maximum allowd timestamp is %d", mr.Timestamp, maxTimestamp)
atomic.AddUint64(&s.tooBigTimestampRows, 1)
continue
}
@@ -805,8 +830,7 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
// Do not stop adding rows on error - just skip invalid row.
// This guarantees that invalid rows don't prevent
// from adding valid rows into the storage.
err = fmt.Errorf("cannot unmarshal MetricNameRaw %q: %s", mr.MetricNameRaw, err)
errors = append(errors, err)
lastError = fmt.Errorf("cannot unmarshal MetricNameRaw %q: %s", mr.MetricNameRaw, err)
j--
continue
}
@@ -816,8 +840,7 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
// Do not stop adding rows on error - just skip invalid row.
// This guarantees that invalid rows don't prevent
// from adding valid rows into the storage.
err = fmt.Errorf("cannot obtain TSID for MetricName %q: %s", kb.B, err)
errors = append(errors, err)
lastError = fmt.Errorf("cannot obtain TSID for MetricName %q: %s", kb.B, err)
j--
continue
}
@@ -831,18 +854,16 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
rows = rows[:rowsLen+j]
if err := s.tb.AddRows(rows); err != nil {
err = fmt.Errorf("cannot add rows to table: %s", err)
errors = append(errors, err)
lastError = fmt.Errorf("cannot add rows to table: %s", err)
}
errors = s.updateDateMetricIDCache(rows, errors)
if len(errors) > 0 {
// Return only the first error, since it has no sense in returning all errors.
return rows, fmt.Errorf("errors occurred during rows addition: %s", errors[0])
lastError = s.updateDateMetricIDCache(rows, lastError)
if lastError != nil {
return rows, fmt.Errorf("errors occurred during rows addition: %s", lastError)
}
return rows, nil
}
func (s *Storage) updateDateMetricIDCache(rows []rawRow, errors []error) []error {
func (s *Storage) updateDateMetricIDCache(rows []rawRow, lastError error) error {
var date uint64
var hour uint64
var prevTimestamp int64
@@ -884,11 +905,11 @@ func (s *Storage) updateDateMetricIDCache(rows []rawRow, errors []error) []error
// by concurrent goroutines.
s.dateMetricIDCache.Set(keyBuf, nil)
if err := idb.storeDateMetricID(date, metricID); err != nil {
errors = append(errors, err)
lastError = err
continue
}
}
return errors
return lastError
}
func (s *Storage) updateCurrHourMetricIDs() {
@@ -950,7 +971,7 @@ func (s *Storage) putTSIDToCache(tsid *TSID, metricName []byte) {
s.tsidCache.Set(metricName, buf)
}
func openIndexDBTables(path string, metricIDCache, metricNameCache *fastcache.Cache, currHourMetricIDs, prevHourMetricIDs *atomic.Value) (curr, prev *indexDB, err error) {
func openIndexDBTables(path string, metricIDCache, metricNameCache *workingsetcache.Cache, currHourMetricIDs, prevHourMetricIDs *atomic.Value) (curr, prev *indexDB, err error) {
if err := fs.MkdirAllIfNotExist(path); err != nil {
return nil, nil, fmt.Errorf("cannot create directory %q: %s", path, err)
}

View File

@@ -349,7 +349,8 @@ func testStorageRandTimestamps(s *Storage) error {
mrs = append(mrs, mr)
}
if err := s.AddRows(mrs, defaultPrecisionBits); err != nil {
if !strings.Contains(err.Error(), "too big timestamp") {
errStr := err.Error()
if !strings.Contains(errStr, "too big timestamp") && !strings.Contains(errStr, "too small timestamp") {
return fmt.Errorf("unexpected error when adding mrs: %s", err)
}
}

View File

@@ -10,7 +10,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"golang.org/x/sys/unix"
)
// table represents a single table with time series data.
@@ -84,13 +83,10 @@ func openTable(path string, retentionMonths int, getDeletedMetricIDs func() map[
return nil, fmt.Errorf("cannot create directory for table %q: %s", path, err)
}
flockFile := path + "/flock.lock"
flockF, err := os.Create(flockFile)
// Protect from concurrent opens.
flockF, err := fs.CreateFlockFile(path)
if err != nil {
return nil, fmt.Errorf("cannot create lock file %q: %s", flockFile, err)
}
if err := unix.Flock(int(flockF.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
return nil, fmt.Errorf("cannot acquire lock on file %q: %s", flockFile, err)
return nil, err
}
// Create directories for small and big partitions if they don't exist yet.

View File

@@ -592,15 +592,18 @@ func extractRegexpPrefix(b []byte) ([]byte, []byte) {
if re == emptyRegexp {
return nil, nil
}
if re.Op == syntax.OpLiteral {
if re.Op == syntax.OpLiteral && re.Flags&syntax.FoldCase == 0 {
return []byte(string(re.Rune)), nil
}
var prefix []byte
if re.Op == syntax.OpConcat && re.Sub[0].Op == syntax.OpLiteral {
prefix = []byte(string(re.Sub[0].Rune))
re.Sub = re.Sub[1:]
if len(re.Sub) == 0 {
return nil, nil
if re.Op == syntax.OpConcat {
sub0 := re.Sub[0]
if sub0.Op == syntax.OpLiteral && sub0.Flags&syntax.FoldCase == 0 {
prefix = []byte(string(sub0.Rune))
re.Sub = re.Sub[1:]
if len(re.Sub) == 0 {
return nil, nil
}
}
}
if _, err := syntax.Compile(re); err != nil {

View File

@@ -5,6 +5,21 @@ import (
"testing"
)
func TestExtractRegexpPrefix(t *testing.T) {
f := func(s string, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := extractRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for %q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for %q; got %q; want %q", s, suffix, expectedSuffix)
}
}
f("", "", "")
f("foobar", "foobar", "")
}
func TestGetRegexpFromCache(t *testing.T) {
f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string) {
t.Helper()
@@ -397,67 +412,73 @@ func TestGetOrValues(t *testing.T) {
}
func TestGetRegexpPrefix(t *testing.T) {
testGetRegexpPrefix(t, "", "", "")
testGetRegexpPrefix(t, "^", "", "")
testGetRegexpPrefix(t, "$", "", "")
testGetRegexpPrefix(t, "^()$", "", "")
testGetRegexpPrefix(t, "^(?:)$", "", "")
testGetRegexpPrefix(t, "foobar", "foobar", "")
testGetRegexpPrefix(t, "foo$|^foobar", "foo", "(?:(?:)|bar)")
testGetRegexpPrefix(t, "^(foo$|^foobar)$", "foo", "(?:(?:)|bar)")
testGetRegexpPrefix(t, "foobar|foobaz", "fooba", "[rz]")
testGetRegexpPrefix(t, "(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x")
testGetRegexpPrefix(t, "(тестЧЧ|тест)", "тест", "(?:ЧЧ|(?:))")
testGetRegexpPrefix(t, "foo(bar|baz|bana)", "fooba", "(?:[rz]|na)")
testGetRegexpPrefix(t, "^foobar|foobaz", "fooba", "[rz]")
testGetRegexpPrefix(t, "^foobar|^foobaz$", "fooba", "[rz]")
testGetRegexpPrefix(t, "foobar|foobaz", "fooba", "[rz]")
testGetRegexpPrefix(t, "(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa(?-s:.)*")
testGetRegexpPrefix(t, "foo[bar]+", "foo", "[a-br]+")
testGetRegexpPrefix(t, "foo[a-z]+", "foo", "[a-z]+")
testGetRegexpPrefix(t, "foo[bar]*", "foo", "[a-br]*")
testGetRegexpPrefix(t, "foo[a-z]*", "foo", "[a-z]*")
testGetRegexpPrefix(t, "foo[x]+", "foo", "x+")
testGetRegexpPrefix(t, "foo[^x]+", "foo", "[^x]+")
testGetRegexpPrefix(t, "foo[x]*", "foo", "x*")
testGetRegexpPrefix(t, "foo[^x]*", "foo", "[^x]*")
testGetRegexpPrefix(t, "foo[x]*bar", "foo", "x*bar")
testGetRegexpPrefix(t, "fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
f := func(t *testing.T, s, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := getRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
}
// Get the prefix from cache.
prefix, suffix = getRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
}
}
f(t, "", "", "")
f(t, "^", "", "")
f(t, "$", "", "")
f(t, "^()$", "", "")
f(t, "^(?:)$", "", "")
f(t, "foobar", "foobar", "")
f(t, "foo$|^foobar", "foo", "(?:(?:)|bar)")
f(t, "^(foo$|^foobar)$", "foo", "(?:(?:)|bar)")
f(t, "foobar|foobaz", "fooba", "[rz]")
f(t, "(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x")
f(t, "(тестЧЧ|тест)", "тест", "(?:ЧЧ|(?:))")
f(t, "foo(bar|baz|bana)", "fooba", "(?:[rz]|na)")
f(t, "^foobar|foobaz", "fooba", "[rz]")
f(t, "^foobar|^foobaz$", "fooba", "[rz]")
f(t, "foobar|foobaz", "fooba", "[rz]")
f(t, "(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa(?-s:.)*")
f(t, "foo[bar]+", "foo", "[a-br]+")
f(t, "foo[a-z]+", "foo", "[a-z]+")
f(t, "foo[bar]*", "foo", "[a-br]*")
f(t, "foo[a-z]*", "foo", "[a-z]*")
f(t, "foo[x]+", "foo", "x+")
f(t, "foo[^x]+", "foo", "[^x]+")
f(t, "foo[x]*", "foo", "x*")
f(t, "foo[^x]*", "foo", "[^x]*")
f(t, "foo[x]*bar", "foo", "x*bar")
f(t, "fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
f(t, "foo.+bar", "foo", "(?-s:.)+bar")
f(t, "a(b|c.*).+", "a", "(?:b|c(?-s:.)*)(?-s:.)+")
f(t, "ab|ac", "a", "[b-c]")
f(t, "(?i)xyz", "", "(?i:XYZ)")
f(t, "(?i)up.+x", "", "(?i:UP)(?-s:.)+(?i:X)")
f(t, "(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)")
// test invalid regexps
testGetRegexpPrefix(t, "a(", "a(", "")
testGetRegexpPrefix(t, "a[", "a[", "")
testGetRegexpPrefix(t, "a[]", "a[]", "")
testGetRegexpPrefix(t, "a{", "a{", "")
testGetRegexpPrefix(t, "a{}", "a{}", "")
testGetRegexpPrefix(t, "invalid(regexp", "invalid(regexp", "")
f(t, "a(", "a(", "")
f(t, "a[", "a[", "")
f(t, "a[]", "a[]", "")
f(t, "a{", "a{", "")
f(t, "a{}", "a{}", "")
f(t, "invalid(regexp", "invalid(regexp", "")
// The transformed regexp mustn't match aba
testGetRegexpPrefix(t, "a?(^ba|c)", "", "a?(?:\\Aba|c)")
f(t, "a?(^ba|c)", "", "a?(?:\\Aba|c)")
// The transformed regexp mustn't match barx
testGetRegexpPrefix(t, "(foo|bar$)x*", "", "(?:foo|bar(?-m:$))x*")
}
func testGetRegexpPrefix(t *testing.T, s, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := getRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
}
// Get the prefix from cache.
prefix, suffix = getRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
}
f(t, "(foo|bar$)x*", "", "(?:foo|bar(?-m:$))x*")
}
func TestTagFiltersAddEmpty(t *testing.T) {

View File

@@ -0,0 +1,255 @@
package workingsetcache
import (
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/fastcache"
)
// Cache is a cache for working set entries.
//
// The cache evicts inactive entries after the given expireDuration.
// Recently accessed entries survive expireDuration.
//
// Comparing to fastcache, this cache minimizes the required RAM size
// to values smaller than maxBytes.
type Cache struct {
curr atomic.Value
prev atomic.Value
// skipPrev indicates whether to use only curr and skip prev.
//
// This flag is set if curr is filled for more than 50% space.
// In this case using prev would result in RAM waste,
// it is better to use only curr cache with doubled size.
skipPrev uint64
// mu serializes access to curr, prev and skipPrev
// in expirationWorker and cacheSizeWatcher.
mu sync.Mutex
wg sync.WaitGroup
stopCh chan struct{}
misses uint64
}
// Load loads the cache from filePath and limits its size to maxBytes
// and evicts inactive entires after expireDuration.
//
// Stop must be called on the returned cache when it is no longer needed.
func Load(filePath string, maxBytes int, expireDuration time.Duration) *Cache {
// Split maxBytes between curr and prev caches.
maxBytes /= 2
curr := fastcache.LoadFromFileOrNew(filePath, maxBytes)
return newWorkingSetCache(curr, maxBytes, expireDuration)
}
// New creates new cache with the given maxBytes size and the given expireDuration
// for inactive entries.
//
// Stop must be called on the returned cache when it is no longer needed.
func New(maxBytes int, expireDuration time.Duration) *Cache {
// Split maxBytes between curr and prev caches.
maxBytes /= 2
curr := fastcache.New(maxBytes)
return newWorkingSetCache(curr, maxBytes, expireDuration)
}
func newWorkingSetCache(curr *fastcache.Cache, maxBytes int, expireDuration time.Duration) *Cache {
prev := fastcache.New(1024)
var c Cache
c.curr.Store(curr)
c.prev.Store(prev)
c.stopCh = make(chan struct{})
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.expirationWorker(maxBytes, expireDuration)
}()
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.cacheSizeWatcher(maxBytes)
}()
return &c
}
func (c *Cache) expirationWorker(maxBytes int, expireDuration time.Duration) {
t := time.NewTicker(expireDuration / 2)
for {
select {
case <-c.stopCh:
t.Stop()
return
case <-t.C:
}
c.mu.Lock()
if atomic.LoadUint64(&c.skipPrev) != 0 {
// Expire prev cache and create fresh curr cache.
// Do not reuse prev cache, since it can have too big capacity.
prev := c.prev.Load().(*fastcache.Cache)
prev.Reset()
curr := c.curr.Load().(*fastcache.Cache)
c.prev.Store(curr)
curr = fastcache.New(maxBytes)
c.curr.Store(curr)
}
c.mu.Unlock()
}
}
func (c *Cache) cacheSizeWatcher(maxBytes int) {
t := time.NewTicker(time.Minute)
for {
select {
case <-c.stopCh:
t.Stop()
return
case <-t.C:
}
var cs fastcache.Stats
curr := c.curr.Load().(*fastcache.Cache)
curr.UpdateStats(&cs)
if cs.BytesSize < uint64(maxBytes)/2 {
continue
}
// curr cache size exceeds 50% of its capacity. It is better
// to double the size of curr cache and stop using prev cache,
// since this will result in higher summary cache capacity.
c.mu.Lock()
curr.Reset()
prev := c.prev.Load().(*fastcache.Cache)
prev.Reset()
curr = fastcache.New(maxBytes * 2)
c.curr.Store(curr)
atomic.StoreUint64(&c.skipPrev, 1)
c.mu.Unlock()
return
}
}
// Save safes the cache to filePath.
func (c *Cache) Save(filePath string) error {
curr := c.curr.Load().(*fastcache.Cache)
concurrency := runtime.GOMAXPROCS(-1)
return curr.SaveToFileConcurrent(filePath, concurrency)
}
// Stop stops the cache.
//
// The cache cannot be used after the Stop call.
func (c *Cache) Stop() {
close(c.stopCh)
c.wg.Wait()
c.Reset()
}
// Reset resets the cache.
func (c *Cache) Reset() {
prev := c.prev.Load().(*fastcache.Cache)
prev.Reset()
curr := c.curr.Load().(*fastcache.Cache)
curr.Reset()
c.misses = 0
}
// UpdateStats updates fcs with cache stats.
func (c *Cache) UpdateStats(fcs *fastcache.Stats) {
curr := c.curr.Load().(*fastcache.Cache)
fcsOrig := *fcs
curr.UpdateStats(fcs)
if atomic.LoadUint64(&c.skipPrev) != 0 {
return
}
fcs.Misses = fcsOrig.Misses + atomic.LoadUint64(&c.misses)
fcsOrig.Reset()
prev := c.prev.Load().(*fastcache.Cache)
prev.UpdateStats(&fcsOrig)
fcs.EntriesCount += fcsOrig.EntriesCount
fcs.BytesSize += fcsOrig.BytesSize
}
// Get appends the found value for the given key to dst and returns the result.
func (c *Cache) Get(dst, key []byte) []byte {
curr := c.curr.Load().(*fastcache.Cache)
result := curr.Get(dst, key)
if len(result) > len(dst) {
// Fast path - the entry is found in the current cache.
return result
}
if atomic.LoadUint64(&c.skipPrev) != 0 {
return result
}
// Search for the entry in the previous cache.
prev := c.prev.Load().(*fastcache.Cache)
result = prev.Get(dst, key)
if len(result) <= len(dst) {
// Nothing found.
atomic.AddUint64(&c.misses, 1)
return result
}
// Cache the found entry in the current cache.
curr.Set(key, result[len(dst):])
return result
}
// Has verifies whether the cahce contains the given key.
func (c *Cache) Has(key []byte) bool {
curr := c.curr.Load().(*fastcache.Cache)
if curr.Has(key) {
return true
}
if atomic.LoadUint64(&c.skipPrev) != 0 {
return false
}
prev := c.prev.Load().(*fastcache.Cache)
return prev.Has(key)
}
// Set sets the given value for the given key.
func (c *Cache) Set(key, value []byte) {
curr := c.curr.Load().(*fastcache.Cache)
curr.Set(key, value)
}
// GetBig appends the found value for the given key to dst and returns the result.
func (c *Cache) GetBig(dst, key []byte) []byte {
curr := c.curr.Load().(*fastcache.Cache)
result := curr.GetBig(dst, key)
if len(result) > len(dst) {
// Fast path - the entry is found in the current cache.
return result
}
if atomic.LoadUint64(&c.skipPrev) != 0 {
return result
}
// Search for the entry in the previous cache.
prev := c.prev.Load().(*fastcache.Cache)
result = prev.GetBig(dst, key)
if len(result) <= len(dst) {
// Nothing found.
atomic.AddUint64(&c.misses, 1)
return result
}
// Cache the found entry in the current cache.
curr.SetBig(key, result[len(dst):])
return result
}
// SetBig sets the given value for the given key.
func (c *Cache) SetBig(key, value []byte) {
curr := c.curr.Load().(*fastcache.Cache)
curr.SetBig(key, value)
}

View File

@@ -67,7 +67,11 @@ func writeProcessMetrics(w io.Writer) {
// It is expensive obtaining `process_open_fds` when big number of file descriptors is opened,
// don't do it here.
fmt.Fprintf(w, "process_cpu_seconds_total %g\n", float64(p.Utime+p.Stime)/userHZ)
utime := float64(p.Utime) / userHZ
stime := float64(p.Stime) / userHZ
fmt.Fprintf(w, "process_cpu_seconds_system_total %g\n", stime)
fmt.Fprintf(w, "process_cpu_seconds_total %g\n", utime+stime)
fmt.Fprintf(w, "process_cpu_seconds_user_total %g\n", utime)
fmt.Fprintf(w, "process_major_pagefaults_total %d\n", p.Majflt)
fmt.Fprintf(w, "process_minor_pagefaults_total %d\n", p.Minflt)
fmt.Fprintf(w, "process_num_threads %d\n", p.NumThreads)

View File

@@ -1,4 +1,5 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Copyright (c) 2019 Klaus Post. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -243,7 +243,7 @@ func (s *Scratch) buildDtable() error {
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
s.decTable[u].nbBits = nBits
newState := (nextState << nBits) - tableSize
if newState > tableSize {
if newState >= tableSize {
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
}
if newState == uint16(u) && nBits == 0 {
@@ -281,8 +281,12 @@ func (s *Scratch) decompress() error {
tmp[off+2] = s1.nextFast()
tmp[off+3] = s2.nextFast()
off += 4
// When off is 0, we have overflowed and should write.
if off == 0 {
s.Out = append(s.Out, tmp...)
if len(s.Out) >= s.DecompressLimit {
return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
}
}
}
} else {
@@ -296,7 +300,7 @@ func (s *Scratch) decompress() error {
off += 4
if off == 0 {
s.Out = append(s.Out, tmp...)
off = 0
// When off is 0, we have overflowed and should write.
if len(s.Out) >= s.DecompressLimit {
return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
}

View File

@@ -247,9 +247,13 @@ func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
dstOut := s.Out
dstEvery := (dstSize + 3) / 4
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
single := s.dt.single[:tlSize]
decode := func(br *bitReader) byte {
val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
v := s.dt.single[val]
v := single[val&tlMask]
br.bitsRead += v.nBits
return v.byte
}
@@ -279,7 +283,7 @@ bigloop:
off += 2
if off == bufoff {
if bufoff > dstEvery {
return nil, errors.New("corruption detected: stream overrun")
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(dstOut, tmp[:bufoff])
copy(dstOut[dstEvery:], tmp[bufoff:bufoff*2])
@@ -288,15 +292,15 @@ bigloop:
off = 0
dstOut = dstOut[bufoff:]
// There must at least be 3 buffers left.
if len(dstOut) < dstEvery*3+3 {
return nil, errors.New("corruption detected: stream overrun")
if len(dstOut) < dstEvery*3 {
return nil, errors.New("corruption detected: stream overrun 2")
}
}
}
if off > 0 {
ioff := int(off)
if len(dstOut) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun")
return nil, errors.New("corruption detected: stream overrun 3")
}
copy(dstOut, tmp[:off])
copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2])
@@ -311,7 +315,7 @@ bigloop:
for !br.finished() {
br.fill()
if offset >= len(dstOut) {
return nil, errors.New("corruption detected: stream overrun")
return nil, errors.New("corruption detected: stream overrun 4")
}
dstOut[offset] = decode(br)
offset++

View File

@@ -34,7 +34,8 @@ For now, a high speed (fastest) and medium-fast (default) compressor has been im
The "Fastest" compression ratio is roughly equivalent to zstd level 1.
The "Default" compression ration is roughly equivalent to zstd level 3 (default).
In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode.
The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2.
@@ -217,7 +218,8 @@ silesia.tar zstd 3 211947520 66793301 1377 146.79
As part of the development process a *Snappy* -> *Zstandard* converter was also built.
This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. Note that a single block is not framed.
This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream.
Note that a single block is not framed.
Conversion is done by converting the stream directly from Snappy without intermediate full decoding.
Therefore the compression ratio is much less than what can be done by a full decompression

View File

@@ -155,14 +155,17 @@ func (h *literalsHeader) setSize(regenLen int) {
}
// setSizes will set the size of a compressed literals section and the input length.
func (h *literalsHeader) setSizes(compLen, inLen int) {
func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
// Only retain 2 bits
const mask = 3
lh := uint64(*h & mask)
switch {
case compBits <= 10 && inBits <= 10:
lh |= (1 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if !single {
lh |= 1 << 2
}
lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if debug {
const mmask = (1 << 24) - 1
n := (lh >> 4) & mmask
@@ -175,8 +178,14 @@ func (h *literalsHeader) setSizes(compLen, inLen int) {
}
case compBits <= 14 && inBits <= 14:
lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
if single {
panic("single stream used with more than 10 bits length.")
}
case compBits <= 18 && inBits <= 18:
lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
if single {
panic("single stream used with more than 10 bits length.")
}
default:
panic("internal error: block too big")
}
@@ -307,12 +316,30 @@ func (b *blockEnc) encodeLits() error {
return nil
}
// TODO: Switch to 1X when less than x bytes.
out, reUsed, err := huff0.Compress4X(b.literals, b.litEnc)
// Bail out of compression is too little.
if len(out) > (len(b.literals) - len(b.literals)>>4) {
var (
out []byte
reUsed, single bool
err error
)
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
// Bail out of compression is too little.
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
// Bail out of compression is too little.
err = huff0.ErrIncompressible
}
} else {
err = huff0.ErrIncompressible
}
switch err {
case huff0.ErrIncompressible:
if debug {
@@ -351,7 +378,7 @@ func (b *blockEnc) encodeLits() error {
lh.setType(literalsBlockCompressed)
}
// Set sizes
lh.setSizes(len(out), len(b.literals))
lh.setSizes(len(out), len(b.literals), single)
bh.setSize(uint32(len(out) + lh.size() + 1))
// Write block headers.
@@ -381,16 +408,23 @@ func (b *blockEnc) encode() error {
b.output = bh.appendTo(b.output)
var (
out []byte
reUsed bool
err error
out []byte
reUsed, single bool
err error
)
if len(b.literals) > 32 {
// TODO: Switch to 1X on small blocks.
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else {
err = huff0.ErrIncompressible
}
@@ -435,7 +469,7 @@ func (b *blockEnc) encode() error {
}
}
}
lh.setSizes(len(out), len(b.literals))
lh.setSizes(len(out), len(b.literals), single)
if debug {
printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
println("Adding literal header:", lh)

View File

@@ -116,6 +116,9 @@ func (r *readerWrapper) readByte() (byte, error) {
}
func (r *readerWrapper) skipN(n int) error {
_, err := io.CopyN(ioutil.Discard, r.r, int64(n))
n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
if n2 != int64(n) {
err = io.ErrUnexpectedEOF
}
return err
}

View File

@@ -281,17 +281,17 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
d.decoders <- block
frame.rawInput = nil
frame.bBuf = nil
d.frames <- frame
}()
frame.bBuf = input
if cap(dst) == 0 {
// Allocate 1MB by default if nothing is provided.
dst = make([]byte, 0, 1<<20)
}
// Allocation here:
br := byteBuf(input)
for {
err := frame.reset(&br)
err := frame.reset(&frame.bBuf)
if err == io.EOF {
return dst, nil
}
@@ -313,7 +313,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
if err != nil {
return dst, err
}
if len(br) == 0 {
if len(frame.bBuf) == 0 {
break
}
}

View File

@@ -82,16 +82,11 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
stepSize++
}
// TEMPLATE
const kSearchStrength = 8
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// nextHash is the hash at s
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
@@ -119,8 +114,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS = nextHashS & dFastShortTableMask
nextHashL = nextHashL & dFastLongTableMask
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -172,8 +167,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
continue
}
const repOff2 = 1
@@ -221,8 +214,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
// Swap offsets
offset1, offset2 = offset2, offset1
continue
@@ -296,8 +287,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
}
// A 4-byte match has been found. Update recent offsets.
@@ -354,20 +343,18 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
e.longTable[hash8(cv0, dFastLongTableBits)&dFastLongTableMask] = te0
e.longTable[hash8(cv1, dFastLongTableBits)&dFastLongTableMask] = te1
e.longTable[hash8(cv0, dFastLongTableBits)] = te0
e.longTable[hash8(cv1, dFastLongTableBits)] = te1
cv0 >>= 8
cv1 >>= 8
te0.offset++
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
e.table[hash5(cv0, dFastShortTableBits)&dFastShortTableMask] = te0
e.table[hash5(cv1, dFastShortTableBits)&dFastShortTableMask] = te1
e.table[hash5(cv0, dFastShortTableBits)] = te0
e.table[hash5(cv1, dFastShortTableBits)] = te1
cv = load6432(src, s)
nextHashS = hash5(cv1>>8, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
if !canRepeat {
continue
@@ -381,14 +368,17 @@ encodeLoop:
break
}
// Store this, since we have it.
nextHashS := hash5(cv1>>8, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.longTable[nextHashL&dFastLongTableMask] = entry
e.table[nextHashS&dFastShortTableMask] = entry
e.longTable[nextHashL] = entry
e.table[nextHashS] = entry
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
@@ -408,8 +398,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
}
}

View File

@@ -124,8 +124,6 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// nextHash is the hash at s
nextHash := hash6(cv, hashLog)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
@@ -157,8 +155,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHash2 := hash6(cv>>8, hashLog) & tableMask
nextHash = nextHash & tableMask
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
@@ -207,8 +205,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
//nextHash = hashLen(cv, hashLog, mls)
nextHash = hash6(cv, hashLog)
continue
}
coffset0 := s - (candidate.offset - e.cur)
@@ -245,7 +241,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
}
// A 4-byte match has been found. We'll later see if more than 4 bytes.
offset2 = offset1
@@ -292,15 +287,16 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
// Check offset 2
if o2 := s - offset2; canRepeat && o2 > 0 && load3232(src, o2) == uint32(cv) {
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: uint32(cv)}
nextHash := hash6(cv, hashLog)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
@@ -319,7 +315,6 @@ encodeLoop:
}
// Prepare next loop.
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
}
}

View File

@@ -257,7 +257,12 @@ func (e *Encoder) nextBlock(final bool) error {
}
s.wWg.Done()
}()
err := blk.encode()
err := errIncompressible
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
err = blk.encode()
}
switch err {
case errIncompressible:
if debug {
@@ -444,7 +449,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
if len(src) == 0 {
blk.last = true
}
err := blk.encode()
err := errIncompressible
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
err = blk.encode()
}
switch err {
case errIncompressible:
if debug {

View File

@@ -39,6 +39,9 @@ type frameDec struct {
rawInput byteBuffer
// Byte buffer that can be reused for small input blocks.
bBuf byteBuf
// asyncRunning indicates whether the async routine processes input on 'decoding'.
asyncRunning bool
asyncRunningMu sync.Mutex

View File

@@ -184,29 +184,75 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
// decSymbol contains information about a state entry,
// Including the state offset base, the output symbol and
// the number of bits to read for the low part of the destination state.
type decSymbol struct {
newState uint16
addBits uint8 // Used for symbols until transformed.
nbBits uint8
baseline uint32
// Using a composite uint64 is faster than a struct with separate members.
type decSymbol uint64
func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol {
return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d decSymbol) nbBits() uint8 {
return uint8(d)
}
func (d decSymbol) addBits() uint8 {
return uint8(d >> 8)
}
func (d decSymbol) newState() uint16 {
return uint16(d >> 16)
}
func (d decSymbol) baseline() uint32 {
return uint32(d >> 32)
}
func (d decSymbol) baselineInt() int {
return int(d >> 32)
}
func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d *decSymbol) setNBits(nBits uint8) {
const mask = 0xffffffffffffff00
*d = (*d & mask) | decSymbol(nBits)
}
func (d *decSymbol) setAddBits(addBits uint8) {
const mask = 0xffffffffffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8)
}
func (d *decSymbol) setNewState(state uint16) {
const mask = 0xffffffff0000ffff
*d = (*d & mask) | decSymbol(state)<<16
}
func (d *decSymbol) setBaseline(baseline uint32) {
const mask = 0xffffffff
*d = (*d & mask) | decSymbol(baseline)<<32
}
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
const mask = 0xffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
}
// decSymbolValue returns the transformed decSymbol for the given symbol.
func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) {
if int(symb) >= len(t) {
return decSymbol{}, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
}
lu := t[symb]
return decSymbol{
addBits: lu.addBits,
baseline: lu.baseLine,
}, nil
return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil
}
// setRLE will set the decoder til RLE mode.
func (s *fseDecoder) setRLE(symbol decSymbol) {
s.actualTableLog = 0
s.maxBits = symbol.addBits
s.maxBits = symbol.addBits()
s.dt[0] = symbol
}
@@ -220,7 +266,7 @@ func (s *fseDecoder) buildDtable() error {
{
for i, v := range s.norm[:s.symbolLen] {
if v == -1 {
s.dt[highThreshold].addBits = uint8(i)
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
symbolNext[i] = 1
} else {
@@ -235,7 +281,7 @@ func (s *fseDecoder) buildDtable() error {
position := uint32(0)
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].addBits = uint8(ss)
s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask
for position > highThreshold {
// lowprob area
@@ -253,11 +299,11 @@ func (s *fseDecoder) buildDtable() error {
{
tableSize := uint16(1 << s.actualTableLog)
for u, v := range s.dt[:tableSize] {
symbol := v.addBits
symbol := v.addBits()
nextState := symbolNext[symbol]
symbolNext[symbol] = nextState + 1
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
s.dt[u&maxTableMask].nbBits = nBits
s.dt[u&maxTableMask].setNBits(nBits)
newState := (nextState << nBits) - tableSize
if newState > tableSize {
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
@@ -266,7 +312,7 @@ func (s *fseDecoder) buildDtable() error {
// Seems weird that this is possible with nbits > 0.
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
}
s.dt[u&maxTableMask].newState = newState
s.dt[u&maxTableMask].setNewState(newState)
}
}
return nil
@@ -279,25 +325,21 @@ func (s *fseDecoder) transform(t []baseOffset) error {
tableSize := uint16(1 << s.actualTableLog)
s.maxBits = 0
for i, v := range s.dt[:tableSize] {
if int(v.addBits) >= len(t) {
return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits, len(t))
add := v.addBits()
if int(add) >= len(t) {
return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t))
}
lu := t[v.addBits]
lu := t[add]
if lu.addBits > s.maxBits {
s.maxBits = lu.addBits
}
s.dt[i&maxTableMask] = decSymbol{
newState: v.newState,
nbBits: v.nbBits,
addBits: lu.addBits,
baseline: lu.baseLine,
}
v.setExt(lu.addBits, lu.baseLine)
s.dt[i] = v
}
return nil
}
type fseState struct {
// TODO: Check if *[1 << maxTablelog]decSymbol is faster.
dt []decSymbol
state decSymbol
}
@@ -312,26 +354,31 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
// next returns the current symbol and sets the next state.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) next(br *bitReader) {
lowBits := uint16(br.getBits(s.state.nbBits))
s.state = s.dt[s.state.newState+lowBits]
lowBits := uint16(br.getBits(s.state.nbBits()))
s.state = s.dt[s.state.newState()+lowBits]
}
// finished returns true if all bits have been read from the bitstream
// and the next state would require reading bits from the input.
func (s *fseState) finished(br *bitReader) bool {
return br.finished() && s.state.nbBits > 0
return br.finished() && s.state.nbBits() > 0
}
// final returns the current state symbol without decoding the next.
func (s *fseState) final() (int, uint8) {
return int(s.state.baseline), s.state.addBits
return s.state.baselineInt(), s.state.addBits()
}
// final returns the current state symbol without decoding the next.
func (s decSymbol) final() (int, uint8) {
return s.baselineInt(), s.addBits()
}
// nextFast returns the next symbol and sets the next state.
// This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := uint16(br.getBitsFast(s.state.nbBits))
s.state = s.dt[s.state.newState+lowBits]
return s.state.baseline, s.state.addBits
lowBits := uint16(br.getBitsFast(s.state.nbBits()))
s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline(), s.state.addBits()
}

Some files were not shown because too many files have changed in this diff Show More