Files
VictoriaMetrics/app/vlinsert/insertutil/line_reader_test.go
Aliaksandr Valialkin 63dccea932 app/vlinsert/journald: parse journald logs in streaming manner
This allows parsing unlimited number of logs in a single HTTP request,
without the need to buffer the logs in memory.

This is needed for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9070

Thanks to @AndrewChubatiuk for the initial pull request - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9153

This commit is based on the https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9153 .
It contains the following changes comparing to the original pull request:

- Remove ugly function LineReader.NextLineWithLineFn(). Instead, uglify the Journald parser a bit
  with hacky calls to LineReader.NextLine() in order to parse binary-encoded field values.
  This should preserve the maintainability of the LineReader, which is shared among multiple protocol parsers,
  under control, while keeping the complexity of Journald parsing inside the app/vlinsert/journald package.

- Fix a typo bug inside isNameValid() - `(r < '0' && r > '9')` must be written as `(r < '0' || r > '9')`.
  Rewrite isNameValid() into easier to understand code and rename it to isValidJournaldFieldName() for better readability.
  Add tests for this function.

- Remove mentioning of the -journald.maxRequestSize command-line flag from VictoriaLogs docs.

- Add the description of the fix to VictoriaLogs changelog.

- Properly increment errorsTotal metric on every journald parse error.

- Add missing protoparserutil.PutUncompressedReader(reader) call, so the reader could be re-used between client requests.

- Remove improperly working code, which tries continuing parsing the request stream after parse errors.
  It is impossible to recover reliably from journald parse errors related to reading the data from the request stream,
  since the journald protocol format is completely braindead. So it is better to immediately return the error
  to the client instead of trying to recover. The only errors, which could be recovered, are related to invalid field names / values.
  Such errors are logged with the WARN level and the corresponding fields are skipped.

- Fix incorrect storage of the re-used name and value strings into fb.fields. The contents of the name and value strings
  must be copied per every loop, which reads these strings from the request stream. Otherwise the contents of the previously
  added Name and Value fields into fb.fields will be overwritten on the next loop.

- Ensure that LineReader.Line is set to nil after LineReader.NextLine() returns false. This should prevent from subtle bugs
  when the LineReader.Line is read after LineReader.NextLine() returns false.
2025-06-18 23:48:22 +02:00

165 lines
4.0 KiB
Go

package insertutil
import (
"bytes"
"fmt"
"io"
"reflect"
"testing"
)
func TestLineReader_Success(t *testing.T) {
f := func(data string, linesExpected []string) {
t.Helper()
r := bytes.NewBufferString(data)
lr := NewLineReader("foo", r)
var lines []string
for lr.NextLine() {
lines = append(lines, string(lr.Line))
}
if err := lr.Err(); err != nil {
t.Fatalf("unexpected error: %s", err)
}
if lr.NextLine() {
t.Fatalf("expecting error on the second call to NextLine()")
}
if len(lr.Line) > 0 {
t.Fatalf("unexpected non-empty line after failed NextLine(): %q", lr.Line)
}
if !reflect.DeepEqual(lines, linesExpected) {
t.Fatalf("unexpected lines\ngot\n%q\nwant\n%q", lines, linesExpected)
}
}
f("", nil)
f("\n", []string{""})
f("\n\n", []string{"", ""})
f("foo", []string{"foo"})
f("foo\n", []string{"foo"})
f("\nfoo", []string{"", "foo"})
f("foo\n\n", []string{"foo", ""})
f("foo\nbar", []string{"foo", "bar"})
f("foo\nbar\n", []string{"foo", "bar"})
f("\nfoo\n\nbar\n\n", []string{"", "foo", "", "bar", ""})
}
func TestLineReader_SkipUntilNextLine(t *testing.T) {
f := func(data string, linesExpected []string) {
t.Helper()
r := bytes.NewBufferString(data)
lr := NewLineReader("foo", r)
var lines []string
for lr.NextLine() {
lines = append(lines, string(lr.Line))
}
if err := lr.Err(); err != nil {
t.Fatalf("unexpected error for data=%q: %s", data, err)
}
if lr.NextLine() {
t.Fatalf("expecting error on the second call to NextLine()")
}
if !reflect.DeepEqual(lines, linesExpected) {
t.Fatalf("unexpected lines for data=%q\ngot\n%q\nwant\n%q", data, lines, linesExpected)
}
}
for _, overflow := range []int{0, 100, MaxLineSizeBytes.IntN(), MaxLineSizeBytes.IntN() + 1, 2 * MaxLineSizeBytes.IntN()} {
longLineLen := MaxLineSizeBytes.IntN() + overflow
longLine := string(make([]byte, longLineLen))
// Single long line
data := longLine
f(data, nil)
// Multiple long lines
data = longLine + "\n" + longLine
f(data, []string{""})
data = longLine + "\n" + longLine + "\n"
f(data, []string{"", ""})
// Long line in the middle
data = "foo\n" + longLine + "\nbar"
f(data, []string{"foo", "", "bar"})
// Multiple long lines in the middle
data = "foo\n" + longLine + "\n" + longLine + "\nbar"
f(data, []string{"foo", "", "", "bar"})
// Long line in the end
data = "foo\n" + longLine
f(data, []string{"foo"})
// Long line in the end
data = "foo\n" + longLine + "\n"
f(data, []string{"foo", ""})
}
}
func TestLineReader_Failure(t *testing.T) {
f := func(data string, linesExpected []string) {
t.Helper()
fr := &failureReader{
r: bytes.NewBufferString(data),
}
lr := NewLineReader("foo", fr)
var lines []string
for lr.NextLine() {
lines = append(lines, string(lr.Line))
}
if err := lr.Err(); err == nil {
t.Fatalf("expecting non-nil error")
}
if lr.NextLine() {
t.Fatalf("expecting error on the second call to NextLine()")
}
if err := lr.Err(); err == nil {
t.Fatalf("expecting non-nil error on the second call")
}
if !reflect.DeepEqual(lines, linesExpected) {
t.Fatalf("unexpected lines\ngot\n%q\nwant\n%q", lines, linesExpected)
}
}
f("", nil)
f("foo", nil)
f("foo\n", []string{"foo"})
f("\n", []string{""})
f("foo\nbar", []string{"foo"})
f("foo\nbar\n", []string{"foo", "bar"})
f("\nfoo\nbar\n\n", []string{"", "foo", "bar", ""})
// long line
longLineLen := MaxLineSizeBytes.IntN()
for _, overflow := range []int{0, 100, MaxLineSizeBytes.IntN(), MaxLineSizeBytes.IntN() + 1, 2 * MaxLineSizeBytes.IntN()} {
longLine := string(make([]byte, longLineLen+overflow))
data := longLine
f(data, nil)
data = "foo\n" + longLine
f(data, []string{"foo"})
data = longLine + "\nfoo"
f(data, []string{""})
data = longLine + "\nfoo\n"
f(data, []string{"", "foo"})
}
}
type failureReader struct {
r io.Reader
}
func (r *failureReader) Read(p []byte) (int, error) {
n, _ := r.r.Read(p)
if n > 0 {
return n, nil
}
return 0, fmt.Errorf("some error")
}