Compare commits

...

10 Commits

Author SHA1 Message Date
Aliaksandr Valialkin
be39414f9c deployment/docker: switch Go builder from go1.12.8 to go1.12.9 2019-08-18 22:07:58 +03:00
Aliaksandr Valialkin
e74fb23189 app/vmselect/promql: add scrape_interval(q[d]) function, which would return scrape interval for q over d 2019-08-18 21:08:26 +03:00
Aliaksandr Valialkin
582fdc059a app/vmselect/promql: hande comparisons with NaN similar to Prometheus
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150
2019-08-18 00:25:50 +03:00
Aliaksandr Valialkin
1c108fc494 app/vmselect/promql: add lifetime(q[d]) function, which returns the lifetime of q over d in seconds.
This function is useful for determining time series lifetime.
`d` must exceed the expected lifetime of the time series, otherwise
the function would return values close to `d`.
2019-08-16 11:59:32 +03:00
Aliaksandr Valialkin
d6b5ed6d39 app/vmselect/promql: fix corner-case calculation for ideriv 2019-08-16 11:59:28 +03:00
Aliaksandr Valialkin
639b14e8ab app/vmselect/promql: properly handle corner cases for rollup functions 2019-08-15 23:29:59 +03:00
Aliaksandr Valialkin
483de1cc06 lib/workingsetcache: automatically detect when it is better to double cache capacity 2019-08-15 22:57:55 +03:00
Aliaksandr Valialkin
9e0896055d deployment/docker: switch Go builder from go1.12.7 to go1.12.8 2019-08-15 20:43:36 +03:00
Aliaksandr Valialkin
5bb61b8b38 vendor: update github.com/valyala/gozstd from v1.5.1 to v1.6.0 2019-08-15 12:56:42 +03:00
Aliaksandr Valialkin
75a58dee02 README.md: typo fix 2019-08-14 03:28:07 +03:00
24 changed files with 419 additions and 201 deletions

View File

@@ -527,7 +527,7 @@ kill -HUP `pidof prometheus`
If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1`
to write data to `<victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
### Multiple retentions

View File

@@ -416,10 +416,25 @@ func binaryOpIfnot(left, right float64) float64 {
}
func binaryOpEq(left, right float64) bool {
// Special handling for nan == nan.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
if math.IsNaN(left) {
return math.IsNaN(right)
}
return left == right
}
func binaryOpNeq(left, right float64) bool {
// Special handling for comparison with nan.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
if math.IsNaN(left) {
return !math.IsNaN(right)
}
if math.IsNaN(right) {
return true
}
return left != right
}

View File

@@ -149,12 +149,6 @@ func scanString(s string) (string, error) {
}
func scanPositiveNumber(s string) (string, error) {
if strings.HasPrefix(s, "Inf") {
return "Inf", nil
}
if strings.HasPrefix(s, "NaN") {
return "NaN", nil
}
// Scan integer part. It may be empty if fractional part exists.
i := 0
for i < len(s) && isDecimalChar(s[i]) {
@@ -333,6 +327,14 @@ func scanTagFilterOpPrefix(s string) int {
return -1
}
func isInfOrNaN(s string) bool {
if len(s) != 3 {
return false
}
s = strings.ToLower(s)
return s == "inf" || s == "nan"
}
func isOffset(s string) bool {
s = strings.ToLower(s)
return s == "offset"
@@ -361,7 +363,7 @@ func isPositiveNumberPrefix(s string) bool {
// Check for .234 numbers
if s[0] != '.' || len(s) < 2 {
return strings.HasPrefix(s, "Inf") || strings.HasPrefix(s, "NaN")
return false
}
return isDecimalChar(s[1])
}

View File

@@ -373,7 +373,7 @@ func (p *parser) parseSingleExpr() (expr, error) {
}
func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
if isPositiveNumberPrefix(p.lex.Token) {
if isPositiveNumberPrefix(p.lex.Token) || isInfOrNaN(p.lex.Token) {
return p.parsePositiveNumberExpr()
}
if isStringPrefix(p.lex.Token) {
@@ -417,7 +417,7 @@ func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
}
func (p *parser) parsePositiveNumberExpr() (*numberExpr, error) {
if !isPositiveNumberPrefix(p.lex.Token) {
if !isPositiveNumberPrefix(p.lex.Token) && !isInfOrNaN(p.lex.Token) {
return nil, fmt.Errorf(`positiveNumberExpr: unexpected token %q; want "number"`, p.lex.Token)
}

View File

@@ -170,14 +170,34 @@ func TestParsePromQLSuccess(t *testing.T) {
another(`-.2`, `-0.2`)
another(`-.2E-2`, `-0.002`)
same(`NaN`)
another(`nan`, `NaN`)
another(`NAN`, `NaN`)
another(`nAN`, `NaN`)
another(`Inf`, `+Inf`)
another(`INF`, `+Inf`)
another(`inf`, `+Inf`)
another(`+Inf`, `+Inf`)
another(`-Inf`, `-Inf`)
another(`-inF`, `-Inf`)
// binaryOpExpr
another(`NaN + 2 *3 * Inf`, `NaN`)
another(`Inf - Inf`, `NaN`)
another(`Inf + Inf`, `+Inf`)
another(`nan == nan`, `NaN`)
another(`nan ==bool nan`, `1`)
another(`nan !=bool nan`, `0`)
another(`nan !=bool 2`, `1`)
another(`2 !=bool nan`, `1`)
another(`nan >bool nan`, `0`)
another(`nan <bool nan`, `0`)
another(`1 ==bool nan`, `0`)
another(`NaN !=bool 1`, `1`)
another(`inf >=bool 2`, `1`)
another(`-1 >bool -inf`, `1`)
another(`-1 <bool -inf`, `0`)
another(`nan + 2 *3 * inf`, `NaN`)
another(`INF - Inf`, `NaN`)
another(`Inf + inf`, `+Inf`)
another(`1/0`, `+Inf`)
another(`0/0`, `NaN`)
another(`-m`, `0 - m`)
same(`m + ignoring () n[5m]`)
another(`M + IGNORING () N[5m]`, `M + ignoring () N[5m]`)

View File

@@ -45,6 +45,8 @@ var rollupFuncs = map[string]newRollupFunc{
"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
"integrate": newRollupFuncOneArg(rollupIntegrate),
"ideriv": newRollupFuncOneArg(rollupIderiv),
"lifetime": newRollupFuncOneArg(rollupLifetime),
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
"rollup": newRollupFuncOneArg(rollupFake),
"rollup_rate": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
"rollup_deriv": newRollupFuncOneArg(rollupFake),
@@ -61,6 +63,8 @@ var rollupFuncsMayAdjustWindow = map[string]bool{
"deriv_fast": true,
"irate": true,
"rate": true,
"lifetime": true,
"scrape_interval": true,
}
var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -615,10 +619,15 @@ func rollupDelta(rfa *rollupFuncArg) float64 {
if len(values) == 0 {
return nan
}
if len(values) == 1 {
// Assume that the previous non-existing value was 0.
return values[0]
}
prevValue = values[0]
values = values[1:]
}
if len(values) == 0 {
// Assume that the value didn't change on the given interval.
return 0
}
return values[len(values)-1] - prevValue
@@ -632,6 +641,7 @@ func rollupIdelta(rfa *rollupFuncArg) float64 {
if math.IsNaN(rfa.prevValue) {
return nan
}
// Assume that the value didn't change on the given interval.
return 0
}
lastValue := values[len(values)-1]
@@ -639,7 +649,8 @@ func rollupIdelta(rfa *rollupFuncArg) float64 {
if len(values) == 0 {
prevValue := rfa.prevValue
if math.IsNaN(prevValue) {
return 0
// Assume that the previous non-existing value was 0.
return lastValue
}
return lastValue - prevValue
}
@@ -661,7 +672,8 @@ func rollupDerivFast(rfa *rollupFuncArg) float64 {
prevValue := rfa.prevValue
prevTimestamp := rfa.prevTimestamp
if math.IsNaN(prevValue) {
if len(values) == 0 {
if len(values) < 2 {
// It is impossible to calculate derivative on 0 or 1 values.
return nan
}
prevValue = values[0]
@@ -670,6 +682,7 @@ func rollupDerivFast(rfa *rollupFuncArg) float64 {
timestamps = timestamps[1:]
}
if len(values) == 0 {
// Assume that the value didn't change on the given interval.
return 0
}
vEnd := values[len(values)-1]
@@ -684,11 +697,12 @@ func rollupIderiv(rfa *rollupFuncArg) float64 {
// before calling rollup funcs.
values := rfa.values
timestamps := rfa.timestamps
if len(values) == 0 {
if math.IsNaN(rfa.prevValue) {
if len(values) < 2 {
if len(values) == 0 || math.IsNaN(rfa.prevValue) {
// It is impossible to calculate derivative on 0 or 1 values.
return nan
}
return 0
return (values[0] - rfa.prevValue) / (float64(timestamps[0]-rfa.prevTimestamp) * 1e-3)
}
vEnd := values[len(values)-1]
tEnd := timestamps[len(timestamps)-1]
@@ -712,7 +726,37 @@ func rollupIderiv(rfa *rollupFuncArg) float64 {
}
dv := vEnd - vStart
dt := tEnd - tStart
return dv / (float64(dt) / 1000)
return dv / (float64(dt) * 1e-3)
}
func rollupLifetime(rfa *rollupFuncArg) float64 {
// Calculate the duration between the first and the last data points.
timestamps := rfa.timestamps
if math.IsNaN(rfa.prevValue) {
if len(timestamps) < 2 {
return nan
}
return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3
}
if len(timestamps) == 0 {
return nan
}
return float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3
}
func rollupScrapeInterval(rfa *rollupFuncArg) float64 {
// Calculate the average interval between data points.
timestamps := rfa.timestamps
if math.IsNaN(rfa.prevValue) {
if len(timestamps) < 2 {
return nan
}
return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3 / float64(len(timestamps)-1)
}
if len(timestamps) == 0 {
return nan
}
return (float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3) / float64(len(timestamps))
}
func rollupChanges(rfa *rollupFuncArg) float64 {

View File

@@ -45,8 +45,19 @@ func TestRollupIderivDuplicateTimestamps(t *testing.T) {
timestamps: []int64{100},
}
n = rollupIderiv(rfa)
if n != 0 {
t.Fatalf("unexpected value; got %v; want %v", n, 0)
if !math.IsNaN(n) {
t.Fatalf("unexpected value; got %v; want %v", n, nan)
}
rfa = &rollupFuncArg{
prevTimestamp: 90,
prevValue: 10,
values: []float64{15},
timestamps: []int64{100},
}
n = rollupIderiv(rfa)
if n != 500 {
t.Fatalf("unexpected value; got %v; want %v", n, 0.5)
}
rfa = &rollupFuncArg{
@@ -569,10 +580,66 @@ func TestRollupFuncsNoWindow(t *testing.T) {
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{0, 33, -87, 0}
valuesExpected := []float64{123, 33, -87, 0}
timestampsExpected := []int64{10, 50, 90, 130}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("lifetime", func(t *testing.T) {
rc := rollupConfig{
Func: rollupLifetime,
Start: 0,
End: 160,
Step: 40,
Window: 0,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.031, 0.044, 0.04, 0.01}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("lifetime", func(t *testing.T) {
rc := rollupConfig{
Func: rollupLifetime,
Start: 0,
End: 160,
Step: 40,
Window: 200,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.031, 0.075, 0.115, 0.125}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("scrape_interval", func(t *testing.T) {
rc := rollupConfig{
Func: rollupScrapeInterval,
Start: 0,
End: 160,
Step: 40,
Window: 0,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, 0.01}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("scrape_interval", func(t *testing.T) {
rc := rollupConfig{
Func: rollupScrapeInterval,
Start: 0,
End: 160,
Step: 40,
Window: 80,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, 0.0125}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
t.Run("changes", func(t *testing.T) {
rc := rollupConfig{
Func: rollupChanges,

View File

@@ -1,5 +1,5 @@
DOCKER_NAMESPACE := victoriametrics
BUILDER_IMAGE := local/builder:go1.12.7
BUILDER_IMAGE := local/builder:go1.12.9
CERTS_IMAGE := local/certs:1.0.2
package-certs:

View File

@@ -1,2 +1,2 @@
FROM golang:1.12.7
FROM golang:1.12.9
STOPSIGNAL SIGINT

2
go.mod
View File

@@ -9,7 +9,7 @@ require (
github.com/klauspost/compress v1.7.5
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/valyala/fastjson v1.4.1
github.com/valyala/gozstd v1.5.1
github.com/valyala/gozstd v1.6.0
github.com/valyala/histogram v1.0.1
github.com/valyala/quicktemplate v1.1.1
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa

4
go.sum
View File

@@ -41,8 +41,8 @@ github.com/valyala/fastjson v1.4.1 h1:hrltpHpIpkaxll8QltMU8c3QZ5+qIiCL8yKqPFJI/y
github.com/valyala/fastjson v1.4.1/go.mod h1:nV6MsjxL2IMJQUoHDIrjEI7oLyeqK6aBD7EFWPsvP8o=
github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/gozstd v1.5.1 h1:ZLepItgu2g+B2CfVQy6KCV/as8lnJ7ef1KU6DPxQSS0=
github.com/valyala/gozstd v1.5.1/go.mod h1:oYOS+oJovjw9ewtrwEYb9+ybolEXd6pHyLMuAWN5zts=
github.com/valyala/gozstd v1.6.0 h1:34qKK75C6Dx9zof2JqUiunfJQ87Up6vTHXABWDyCH+g=
github.com/valyala/gozstd v1.6.0/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
github.com/valyala/histogram v1.0.1 h1:FzA7n2Tz/wKRMejgu3PV1vw3htAklTjjuoI6z3d4KDg=
github.com/valyala/histogram v1.0.1/go.mod h1:lQy0xA4wUz2+IUnf97SivorsJIp8FxsnRd6x25q7Mto=
github.com/valyala/quicktemplate v1.1.1 h1:C58y/wN0FMTi2PR0n3onltemfFabany53j7M6SDDB8k=

View File

@@ -1,7 +1,6 @@
package workingsetcache
import (
"flag"
"runtime"
"sync"
"sync/atomic"
@@ -10,9 +9,6 @@ import (
"github.com/VictoriaMetrics/fastcache"
)
var oldBehavior = flag.Bool("cache.oldBehavior", false, "Whether to use old behaviour for caches. Old behavior can give better resuts "+
"for low-RAM systems serving big number of time series. Systems with enough RAM would consume more RAM when `-cache.oldBehavior` is enabled")
// Cache is a cache for working set entries.
//
// The cache evicts inactive entries after the given expireDuration.
@@ -24,6 +20,17 @@ type Cache struct {
curr atomic.Value
prev atomic.Value
// skipPrev indicates whether to use only curr and skip prev.
//
// This flag is set if curr is filled for more than 50% space.
// In this case using prev would result in RAM waste,
// it is better to use only curr cache with doubled size.
skipPrev uint64
// mu serializes access to curr, prev and skipPrev
// in expirationWorker and cacheSizeWatcher.
mu sync.Mutex
wg sync.WaitGroup
stopCh chan struct{}
@@ -35,10 +42,8 @@ type Cache struct {
//
// Stop must be called on the returned cache when it is no longer needed.
func Load(filePath string, maxBytes int, expireDuration time.Duration) *Cache {
if !*oldBehavior {
// Split maxBytes between curr and prev caches.
maxBytes /= 2
}
// Split maxBytes between curr and prev caches.
maxBytes /= 2
curr := fastcache.LoadFromFileOrNew(filePath, maxBytes)
return newWorkingSetCache(curr, maxBytes, expireDuration)
}
@@ -48,10 +53,8 @@ func Load(filePath string, maxBytes int, expireDuration time.Duration) *Cache {
//
// Stop must be called on the returned cache when it is no longer needed.
func New(maxBytes int, expireDuration time.Duration) *Cache {
if !*oldBehavior {
// Split maxBytes between curr and prev caches.
maxBytes /= 2
}
// Split maxBytes between curr and prev caches.
maxBytes /= 2
curr := fastcache.New(maxBytes)
return newWorkingSetCache(curr, maxBytes, expireDuration)
}
@@ -62,21 +65,33 @@ func newWorkingSetCache(curr *fastcache.Cache, maxBytes int, expireDuration time
c.curr.Store(curr)
c.prev.Store(prev)
c.stopCh = make(chan struct{})
c.wg.Add(1)
go func() {
defer c.wg.Done()
t := time.NewTicker(expireDuration / 2)
for {
select {
case <-c.stopCh:
return
case <-t.C:
}
if *oldBehavior {
// Keep the curr cache for old behavior.
continue
}
c.expirationWorker(maxBytes, expireDuration)
}()
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.cacheSizeWatcher(maxBytes)
}()
return &c
}
func (c *Cache) expirationWorker(maxBytes int, expireDuration time.Duration) {
t := time.NewTicker(expireDuration / 2)
for {
select {
case <-c.stopCh:
t.Stop()
return
case <-t.C:
}
c.mu.Lock()
if atomic.LoadUint64(&c.skipPrev) != 0 {
// Expire prev cache and create fresh curr cache.
// Do not reuse prev cache, since it can have too big capacity.
prev := c.prev.Load().(*fastcache.Cache)
prev.Reset()
@@ -85,8 +100,39 @@ func newWorkingSetCache(curr *fastcache.Cache, maxBytes int, expireDuration time
curr = fastcache.New(maxBytes)
c.curr.Store(curr)
}
}()
return &c
c.mu.Unlock()
}
}
func (c *Cache) cacheSizeWatcher(maxBytes int) {
t := time.NewTicker(time.Minute)
for {
select {
case <-c.stopCh:
t.Stop()
return
case <-t.C:
}
var cs fastcache.Stats
curr := c.curr.Load().(*fastcache.Cache)
curr.UpdateStats(&cs)
if cs.BytesSize < uint64(maxBytes)/2 {
continue
}
// curr cache size exceeds 50% of its capacity. It is better
// to double the size of curr cache and stop using prev cache,
// since this will result in higher summary cache capacity.
c.mu.Lock()
curr.Reset()
prev := c.prev.Load().(*fastcache.Cache)
prev.Reset()
curr = fastcache.New(maxBytes * 2)
c.curr.Store(curr)
atomic.StoreUint64(&c.skipPrev, 1)
c.mu.Unlock()
return
}
}
// Save safes the cache to filePath.
@@ -121,11 +167,11 @@ func (c *Cache) UpdateStats(fcs *fastcache.Stats) {
curr := c.curr.Load().(*fastcache.Cache)
fcsOrig := *fcs
curr.UpdateStats(fcs)
if *oldBehavior {
if atomic.LoadUint64(&c.skipPrev) != 0 {
return
}
fcs.Misses = fcsOrig.Misses + atomic.LoadUint64(&c.misses)
fcs.Misses = fcsOrig.Misses + atomic.LoadUint64(&c.misses)
fcsOrig.Reset()
prev := c.prev.Load().(*fastcache.Cache)
prev.UpdateStats(&fcsOrig)
@@ -141,7 +187,7 @@ func (c *Cache) Get(dst, key []byte) []byte {
// Fast path - the entry is found in the current cache.
return result
}
if *oldBehavior {
if atomic.LoadUint64(&c.skipPrev) != 0 {
return result
}
@@ -164,7 +210,7 @@ func (c *Cache) Has(key []byte) bool {
if curr.Has(key) {
return true
}
if *oldBehavior {
if atomic.LoadUint64(&c.skipPrev) != 0 {
return false
}
prev := c.prev.Load().(*fastcache.Cache)
@@ -185,7 +231,7 @@ func (c *Cache) GetBig(dst, key []byte) []byte {
// Fast path - the entry is found in the current cache.
return result
}
if *oldBehavior {
if atomic.LoadUint64(&c.skipPrev) != 0 {
return result
}

View File

@@ -3,6 +3,7 @@ GOARCH ?= $(shell go env GOARCH)
GOOS_GOARCH := $(GOOS)_$(GOARCH)
GOOS_GOARCH_NATIVE := $(shell go env GOHOSTOS)_$(shell go env GOHOSTARCH)
LIBZSTD_NAME := libzstd_$(GOOS_GOARCH).a
ZSTD_VERSION ?= master
.PHONY: libzstd.a
@@ -10,15 +11,15 @@ libzstd.a: $(LIBZSTD_NAME)
$(LIBZSTD_NAME):
ifeq ($(GOOS_GOARCH),$(GOOS_GOARCH_NATIVE))
cd zstd/lib && ZSTD_LEGACY_SUPPORT=0 $(MAKE) clean libzstd.a
cd zstd/lib && ZSTD_LEGACY_SUPPORT=0 MOREFLAGS=$(MOREFLAGS) $(MAKE) clean libzstd.a
mv zstd/lib/libzstd.a $(LIBZSTD_NAME)
else
ifeq ($(GOOS_GOARCH),linux_arm)
cd zstd/lib && CC=arm-linux-gnueabi-gcc ZSTD_LEGACY_SUPPORT=0 $(MAKE) clean libzstd.a
cd zstd/lib && CC=arm-linux-gnueabi-gcc ZSTD_LEGACY_SUPPORT=0 MOREFLAGS=$(MOREFLAGS) $(MAKE) clean libzstd.a
mv zstd/lib/libzstd.a libzstd_linux_arm.a
endif
ifeq ($(GOOS_GOARCH),linux_arm64)
cd zstd/lib && CC=aarch64-linux-gnu-gcc ZSTD_LEGACY_SUPPORT=0 $(MAKE) clean libzstd.a
cd zstd/lib && CC=aarch64-linux-gnu-gcc ZSTD_LEGACY_SUPPORT=0 MOREFLAGS=$(MOREFLAGS) $(MAKE) clean libzstd.a
mv zstd/lib/libzstd.a libzstd_linux_arm64.a
endif
endif
@@ -29,7 +30,7 @@ clean:
update-zstd:
rm -rf zstd-tmp
git clone --branch master --depth 1 https://github.com/Facebook/zstd zstd-tmp
git clone --branch $(ZSTD_VERSION) --depth 1 https://github.com/Facebook/zstd zstd-tmp
rm -rf zstd-tmp/.git
rm -rf zstd
mv zstd-tmp zstd

View File

@@ -73,5 +73,8 @@ and [Reader](https://godoc.org/github.com/valyala/gozstd#Reader) for stream deco
* Q: _I don't trust `libzstd*.a` binary files from the repo or these files dont't work on my OS/ARCH. How to rebuild them?_
A: Just run `make clean libzstd.a` if your OS/ARCH is supported.
* Q: _How do I specify custom build flags when recompiling `libzstd*.a`?_
A: You can specify MOREFLAGS=... variable when running `make` like this: `MOREFLAGS=-fPIC make clean libzstd.a`.
* Q: _Why the repo contains `libzstd*.a` binary files?_
A: This simplifies package installation to usual `go get` without additional steps for building the `libzstd*.a`

View File

@@ -1,6 +1,8 @@
package gozstd
/*
#cgo CFLAGS: -O3
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
@@ -86,7 +88,10 @@ func BuildDict(samples [][]byte, desiredDictLen int) []byte {
&samplesSizes[0],
C.unsigned(len(samplesSizes)))
buildDictLock.Unlock()
ensureNoError("ZDICT_trainFromBuffer", result)
if C.ZDICT_isError(result) != 0 {
// Return empty dictionary, since the original samples are too small.
return nil
}
dictLen := int(result)
return dict[:dictLen]

View File

@@ -1 +1,3 @@
module github.com/valyala/gozstd
go 1.12

View File

@@ -66,72 +66,55 @@ func CompressDict(dst, src []byte, cd *CDict) []byte {
}
func compressDictLevel(dst, src []byte, cd *CDict, compressionLevel int) []byte {
compressInitOnce.Do(compressInit)
concurrencyLimitCh <- struct{}{}
var cctx, cctxDict *cctxWrapper
if cd == nil {
cctx = cctxPool.Get().(*cctxWrapper)
} else {
cctxDict = cctxDictPool.Get().(*cctxWrapper)
}
dst = compress(cctx, cctxDict, dst, src, cd, compressionLevel)
if cd == nil {
cctxPool.Put(cctx)
} else {
cctxDictPool.Put(cctxDict)
}
<-concurrencyLimitCh
cw := getCompressWork()
cw.dst = dst
cw.src = src
cw.cd = cd
cw.compressionLevel = compressionLevel
compressWorkCh <- cw
<-cw.done
dst = cw.dst
putCompressWork(cw)
return dst
}
func getCompressWork() *compressWork {
v := compressWorkPool.Get()
if v == nil {
v = &compressWork{
done: make(chan struct{}),
}
}
return v.(*compressWork)
var cctxPool = &sync.Pool{
New: newCCtx,
}
func putCompressWork(cw *compressWork) {
cw.src = nil
cw.dst = nil
cw.cd = nil
cw.compressionLevel = 0
compressWorkPool.Put(cw)
var cctxDictPool = &sync.Pool{
New: newCCtx,
}
type compressWork struct {
dst []byte
src []byte
cd *CDict
compressionLevel int
done chan struct{}
}
var (
compressWorkCh chan *compressWork
compressWorkPool sync.Pool
compressInitOnce sync.Once
)
func compressInit() {
gomaxprocs := runtime.GOMAXPROCS(-1)
compressWorkCh = make(chan *compressWork, gomaxprocs)
for i := 0; i < gomaxprocs; i++ {
go compressWorker()
}
}
func compressWorker() {
func newCCtx() interface{} {
cctx := C.ZSTD_createCCtx()
cctxDict := C.ZSTD_createCCtx()
for cw := range compressWorkCh {
cw.dst = compress(cctx, cctxDict, cw.dst, cw.src, cw.cd, cw.compressionLevel)
cw.done <- struct{}{}
cw := &cctxWrapper{
cctx: cctx,
}
runtime.SetFinalizer(cw, freeCCtx)
return cw
}
func compress(cctx, cctxDict *C.ZSTD_CCtx, dst, src []byte, cd *CDict, compressionLevel int) []byte {
func freeCCtx(cw *cctxWrapper) {
C.ZSTD_freeCCtx(cw.cctx)
cw.cctx = nil
}
type cctxWrapper struct {
cctx *C.ZSTD_CCtx
}
func compress(cctx, cctxDict *cctxWrapper, dst, src []byte, cd *CDict, compressionLevel int) []byte {
if len(src) == 0 {
return dst
}
@@ -167,9 +150,9 @@ func compress(cctx, cctxDict *C.ZSTD_CCtx, dst, src []byte, cd *CDict, compressi
return dst[:dstLen+compressedSize]
}
func compressInternal(cctx, cctxDict *C.ZSTD_CCtx, dst, src []byte, cd *CDict, compressionLevel int, mustSucceed bool) C.size_t {
func compressInternal(cctx, cctxDict *cctxWrapper, dst, src []byte, cd *CDict, compressionLevel int, mustSucceed bool) C.size_t {
if cd != nil {
result := C.ZSTD_compress_usingCDict_wrapper(cctxDict,
result := C.ZSTD_compress_usingCDict_wrapper(cctxDict.cctx,
C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
C.size_t(cap(dst)),
C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),
@@ -183,7 +166,7 @@ func compressInternal(cctx, cctxDict *C.ZSTD_CCtx, dst, src []byte, cd *CDict, c
}
return result
}
result := C.ZSTD_compressCCtx_wrapper(cctx,
result := C.ZSTD_compressCCtx_wrapper(cctx.cctx,
C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
C.size_t(cap(dst)),
C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),
@@ -207,72 +190,56 @@ func Decompress(dst, src []byte) ([]byte, error) {
//
// The given dictionary dd is used for the decompression.
func DecompressDict(dst, src []byte, dd *DDict) ([]byte, error) {
decompressInitOnce.Do(decompressInit)
concurrencyLimitCh <- struct{}{}
var dctx, dctxDict *dctxWrapper
if dd == nil {
dctx = dctxPool.Get().(*dctxWrapper)
} else {
dctxDict = dctxDictPool.Get().(*dctxWrapper)
}
var err error
dst, err = decompress(dctx, dctxDict, dst, src, dd)
if dd == nil {
dctxPool.Put(dctx)
} else {
dctxDictPool.Put(dctxDict)
}
<-concurrencyLimitCh
dw := getDecompressWork()
dw.dst = dst
dw.src = src
dw.dd = dd
decompressWorkCh <- dw
<-dw.done
dst = dw.dst
err := dw.err
putDecompressWork(dw)
return dst, err
}
func getDecompressWork() *decompressWork {
v := decompressWorkPool.Get()
if v == nil {
v = &decompressWork{
done: make(chan struct{}),
}
}
return v.(*decompressWork)
var dctxPool = &sync.Pool{
New: newDCtx,
}
func putDecompressWork(dw *decompressWork) {
dw.dst = nil
dw.src = nil
dw.dd = nil
dw.err = nil
decompressWorkPool.Put(dw)
var dctxDictPool = &sync.Pool{
New: newDCtx,
}
type decompressWork struct {
dst []byte
src []byte
dd *DDict
err error
done chan struct{}
}
var (
decompressWorkCh chan *decompressWork
decompressWorkPool sync.Pool
decompressInitOnce sync.Once
)
func decompressInit() {
gomaxprocs := runtime.GOMAXPROCS(-1)
decompressWorkCh = make(chan *decompressWork, gomaxprocs)
for i := 0; i < gomaxprocs; i++ {
go decompressWorker()
}
}
func decompressWorker() {
func newDCtx() interface{} {
dctx := C.ZSTD_createDCtx()
dctxDict := C.ZSTD_createDCtx()
for dw := range decompressWorkCh {
dw.dst, dw.err = decompress(dctx, dctxDict, dw.dst, dw.src, dw.dd)
dw.done <- struct{}{}
dw := &dctxWrapper{
dctx: dctx,
}
runtime.SetFinalizer(dw, freeDCtx)
return dw
}
func decompress(dctx, dctxDict *C.ZSTD_DCtx, dst, src []byte, dd *DDict) ([]byte, error) {
func freeDCtx(dw *dctxWrapper) {
C.ZSTD_freeDCtx(dw.dctx)
dw.dctx = nil
}
type dctxWrapper struct {
dctx *C.ZSTD_DCtx
}
func decompress(dctx, dctxDict *dctxWrapper, dst, src []byte, dd *DDict) ([]byte, error) {
if len(src) == 0 {
return dst, nil
}
@@ -325,17 +292,17 @@ func decompress(dctx, dctxDict *C.ZSTD_DCtx, dst, src []byte, dd *DDict) ([]byte
return dst[:dstLen], fmt.Errorf("decompression error: %s", errStr(result))
}
func decompressInternal(dctx, dctxDict *C.ZSTD_DCtx, dst, src []byte, dd *DDict) C.size_t {
func decompressInternal(dctx, dctxDict *dctxWrapper, dst, src []byte, dd *DDict) C.size_t {
var n C.size_t
if dd != nil {
n = C.ZSTD_decompress_usingDDict_wrapper(dctxDict,
n = C.ZSTD_decompress_usingDDict_wrapper(dctxDict.dctx,
C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
C.size_t(cap(dst)),
C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),
C.size_t(len(src)),
dd.p)
} else {
n = C.ZSTD_decompressDCtx_wrapper(dctx,
n = C.ZSTD_decompressDCtx_wrapper(dctx.dctx,
C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
C.size_t(cap(dst)),
C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),
@@ -347,6 +314,11 @@ func decompressInternal(dctx, dctxDict *C.ZSTD_DCtx, dst, src []byte, dd *DDict)
return n
}
var concurrencyLimitCh = func() chan struct{} {
gomaxprocs := runtime.GOMAXPROCS(-1)
return make(chan struct{}, gomaxprocs)
}()
func errStr(result C.size_t) string {
errCode := C.ZSTD_getErrorCode(result)
errCStr := C.ZSTD_getErrorString(errCode)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -164,14 +164,18 @@ func (zw *Writer) ReadFrom(r io.Reader) (int64, error) {
// Fill the inBuf.
for zw.inBuf.size < cstreamInBufSize {
n, err := r.Read(zw.inBufGo[zw.inBuf.size:cstreamInBufSize])
// Sometimes n > 0 even when Read() returns an error.
// This is true especially if the error is io.EOF.
zw.inBuf.size += C.size_t(n)
nn += int64(n)
if err != nil {
if err == io.EOF {
return nn, nil
}
return nn, err
}
zw.inBuf.size += C.size_t(n)
nn += int64(n)
}
// Flush the inBuf.

View File

@@ -94,6 +94,8 @@ typedef struct {
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
ZDICT_params_t zParams;
} ZDICT_cover_params_t;
@@ -105,6 +107,9 @@ typedef struct {
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
ZDICT_params_t zParams;
} ZDICT_fastCover_params_t;

View File

@@ -71,7 +71,7 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 4
#define ZSTD_VERSION_RELEASE 0
#define ZSTD_VERSION_RELEASE 2
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
@@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library v
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
/***************************************
* Default constant
***************************************/
/* *************************************
* Default constant
***************************************/
#ifndef ZSTD_CLEVEL_DEFAULT
# define ZSTD_CLEVEL_DEFAULT 3
#endif
/***************************************
* Constants
***************************************/
/* *************************************
* Constants
***************************************/
/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */
@@ -183,9 +183,14 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres
***************************************/
/*= Compression context
* When compressing many times,
* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
* it is recommended to allocate a context just once,
* and re-use it for each successive compression operation.
* This will make workload friendlier for system's memory.
* Use one context per thread for parallel execution in multi-threaded environments. */
* Note : re-using context is just a speed / resource optimization.
* It doesn't change the compression ratio, which remains identical.
* Note 2 : In multi-threaded environments,
* use one different context per thread for parallel execution.
*/
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@@ -380,6 +385,7 @@ typedef enum {
* ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
@@ -389,6 +395,7 @@ typedef enum {
ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003,
} ZSTD_cParameter;
typedef struct {
@@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
/*******************************************************************************
* This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and
* ZSTD_compressStream2(). It is redundant, but is still fully supported.
/* These buffer sizes are softly recommended.
* They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
* Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
* reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
*
* However, note that these recommendations are from the perspective of a C caller program.
* If the streaming interface is invoked from some other language,
* especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
* a major performance rule is to reduce crossing such interface to an absolute minimum.
* It's not rare that performance ends being spent more into the interface, rather than compression itself.
* In which cases, prefer using large buffers, as large as practical,
* for both input and output, to reduce the nb of roundtrips.
*/
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
/* *****************************************************************************
* This following is a legacy streaming API.
* It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
* It is redundant, but remains fully supported.
* Advanced parameters and dictionary compression can only be used through the
* new API.
******************************************************************************/
/**
/*!
* Equivalent to:
*
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
*/
ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
/**
/*!
* Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
* NOTE: The return value is different. ZSTD_compressStream() returns a hint for
* the next read size (if non-zero and not an error). ZSTD_compressStream2()
* returns the number of bytes left to flush (if non-zero and not an error).
* returns the minimum nb of bytes left to flush (if non-zero and not an error).
*/
ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
@@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#endif /* ZSTD_H_235446 */
/****************************************************************************************
/* **************************************************************************************
* ADVANCED AND EXPERIMENTAL FUNCTIONS
****************************************************************************************
* The definitions in the following section are considered experimental.
@@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#define ZSTD_LDM_HASHRATELOG_MIN 0
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
/* Advanced parameter bounds */
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
/* internal */
#define ZSTD_HASHLOG3_MAX 17
@@ -1162,7 +1189,7 @@ typedef enum {
* however it does mean that all frame data must be present and valid. */
ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
/** ZSTD_decompressBound() :
/*! ZSTD_decompressBound() :
* `src` should point to the start of a series of ZSTD encoded and/or skippable frames
* `srcSize` must be the _exact_ size of this series
* (i.e. there should be a frame boundary at `src + srcSize`)
@@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
*/
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
/* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size (default:0) */
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
@@ -1843,7 +1875,7 @@ typedef struct {
unsigned checksumFlag;
} ZSTD_frameHeader;
/** ZSTD_getFrameHeader() :
/*! ZSTD_getFrameHeader() :
* decode Frame Header, or requires larger `srcSize`.
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,

2
vendor/modules.txt vendored
View File

@@ -18,7 +18,7 @@ github.com/valyala/bytebufferpool
github.com/valyala/fastjson/fastfloat
# github.com/valyala/fastrand v1.0.0
github.com/valyala/fastrand
# github.com/valyala/gozstd v1.5.1
# github.com/valyala/gozstd v1.6.0
github.com/valyala/gozstd
# github.com/valyala/histogram v1.0.1
github.com/valyala/histogram