lib/logstorage: initial implementation of pipes in LogsQL

See https://docs.victoriametrics.com/victorialogs/logsql/#pipes
This commit is contained in:
Aliaksandr Valialkin
2024-05-12 16:33:29 +02:00
parent e66465cb03
commit 9dbd0f9085
119 changed files with 24178 additions and 14059 deletions

View File

@@ -1,5 +1,10 @@
package stringsutil
import (
"unicode"
"unicode/utf8"
)
// LimitStringLen limits the length of s with maxLen.
//
// If len(s) > maxLen, then s is replaced with "s_prefix..s_suffix",
@@ -14,3 +19,33 @@ func LimitStringLen(s string, maxLen int) string {
n := (maxLen / 2) - 1
return s[:n] + ".." + s[len(s)-n:]
}
// AppendLowercase appends lowercase s to dst and returns the result.
//
// It is faster alternative to strings.ToLower.
func AppendLowercase(dst []byte, s string) []byte {
dstLen := len(dst)
// Try fast path at first by assuming that s contains only ASCII chars.
hasUnicodeChars := false
for i := 0; i < len(s); i++ {
c := s[i]
if c >= utf8.RuneSelf {
hasUnicodeChars = true
break
}
if c >= 'A' && c <= 'Z' {
c += 'a' - 'A'
}
dst = append(dst, c)
}
if hasUnicodeChars {
// Slow path - s contains non-ASCII chars. Use Unicode encoding.
dst = dst[:dstLen]
for _, r := range s {
r = unicode.ToLower(r)
dst = utf8.AppendRune(dst, r)
}
}
return dst
}

View File

@@ -22,3 +22,19 @@ func TestLimitStringLen(t *testing.T) {
f("abcde", 4, "a..e")
f("abcde", 5, "abcde")
}
func TestAppendLowercase(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
result := AppendLowercase(nil, s)
if string(result) != resultExpected {
t.Fatalf("unexpected result; got %q; want %q", result, resultExpected)
}
}
f("", "")
f("foo", "foo")
f("FOO", "foo")
f("foo БаР baz 123", "foo бар baz 123")
}

View File

@@ -0,0 +1,96 @@
package stringsutil
import (
"strings"
"sync/atomic"
"testing"
)
func BenchmarkAppendLowercase(b *testing.B) {
b.Run("ascii-all-lowercase", func(b *testing.B) {
benchmarkAppendLowercase(b, []string{"foo bar baz abc def", "23k umlkds", "lq, poweri2349)"})
})
b.Run("ascii-some-uppercase", func(b *testing.B) {
benchmarkAppendLowercase(b, []string{"Foo Bar baz ABC def", "23k umlKDs", "lq, Poweri2349)"})
})
b.Run("ascii-all-uppercase", func(b *testing.B) {
benchmarkAppendLowercase(b, []string{"FOO BAR BAZ ABC DEF", "23K UMLKDS", "LQ, POWERI2349)"})
})
b.Run("unicode-all-lowercase", func(b *testing.B) {
benchmarkAppendLowercase(b, []string{"хщцукодл длобючф дл", "23и юбывлц", "лф, длощшу2349)"})
})
b.Run("unicode-some-uppercase", func(b *testing.B) {
benchmarkAppendLowercase(b, []string{"Хщцукодл Длобючф ДЛ", "23и юбыВЛц", "лф, Длощшу2349)"})
})
b.Run("unicode-all-uppercase", func(b *testing.B) {
benchmarkAppendLowercase(b, []string{"ХЩЦУКОДЛ ДЛОБЮЧФ ДЛ", "23И ЮБЫВЛЦ", "ЛФ, ДЛОЩШУ2349)"})
})
}
func benchmarkAppendLowercase(b *testing.B, a []string) {
n := 0
for _, s := range a {
n += len(s)
}
b.ReportAllocs()
b.SetBytes(int64(n))
b.RunParallel(func(pb *testing.PB) {
var buf []byte
var n uint64
for pb.Next() {
buf = buf[:0]
for _, s := range a {
buf = AppendLowercase(buf, s)
}
n += uint64(len(buf))
}
GlobalSink.Add(n)
})
}
func BenchmarkStringsToLower(b *testing.B) {
b.Run("ascii-all-lowercase", func(b *testing.B) {
benchmarkStringsToLower(b, []string{"foo bar baz abc def", "23k umlkds", "lq, poweri2349)"})
})
b.Run("ascii-some-uppercase", func(b *testing.B) {
benchmarkStringsToLower(b, []string{"Foo Bar baz ABC def", "23k umlKDs", "lq, Poweri2349)"})
})
b.Run("ascii-all-uppercase", func(b *testing.B) {
benchmarkStringsToLower(b, []string{"FOO BAR BAZ ABC DEF", "23K UMLKDS", "LQ, POWERI2349)"})
})
b.Run("unicode-all-lowercase", func(b *testing.B) {
benchmarkStringsToLower(b, []string{"хщцукодл длобючф дл", "23и юбывлц", "лф, длощшу2349)"})
})
b.Run("unicode-some-uppercase", func(b *testing.B) {
benchmarkStringsToLower(b, []string{"Хщцукодл Длобючф ДЛ", "23и юбыВЛц", "лф, Длощшу2349)"})
})
b.Run("unicode-all-uppercase", func(b *testing.B) {
benchmarkStringsToLower(b, []string{"ХЩЦУКОДЛ ДЛОБЮЧФ ДЛ", "23И ЮБЫВЛЦ", "ЛФ, ДЛОЩШУ2349)"})
})
}
func benchmarkStringsToLower(b *testing.B, a []string) {
n := 0
for _, s := range a {
n += len(s)
}
b.ReportAllocs()
b.SetBytes(int64(n))
b.RunParallel(func(pb *testing.PB) {
var buf []byte
var n uint64
for pb.Next() {
buf = buf[:0]
for _, s := range a {
sLower := strings.ToLower(s)
buf = append(buf, sLower...)
}
n += uint64(len(buf))
}
GlobalSink.Add(n)
})
}
var GlobalSink atomic.Uint64