Files
VictoriaMetrics/app/vlinsert/insertutil/line_reader.go
Aliaksandr Valialkin 63dccea932 app/vlinsert/journald: parse journald logs in streaming manner
This allows parsing unlimited number of logs in a single HTTP request,
without the need to buffer the logs in memory.

This is needed for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9070

Thanks to @AndrewChubatiuk for the initial pull request - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9153

This commit is based on the https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9153 .
It contains the following changes comparing to the original pull request:

- Remove ugly function LineReader.NextLineWithLineFn(). Instead, uglify the Journald parser a bit
  with hacky calls to LineReader.NextLine() in order to parse binary-encoded field values.
  This should preserve the maintainability of the LineReader, which is shared among multiple protocol parsers,
  under control, while keeping the complexity of Journald parsing inside the app/vlinsert/journald package.

- Fix a typo bug inside isNameValid() - `(r < '0' && r > '9')` must be written as `(r < '0' || r > '9')`.
  Rewrite isNameValid() into easier to understand code and rename it to isValidJournaldFieldName() for better readability.
  Add tests for this function.

- Remove mentioning of the -journald.maxRequestSize command-line flag from VictoriaLogs docs.

- Add the description of the fix to VictoriaLogs changelog.

- Properly increment errorsTotal metric on every journald parse error.

- Add missing protoparserutil.PutUncompressedReader(reader) call, so the reader could be re-used between client requests.

- Remove improperly working code, which tries continuing parsing the request stream after parse errors.
  It is impossible to recover reliably from journald parse errors related to reading the data from the request stream,
  since the journald protocol format is completely braindead. So it is better to immediately return the error
  to the client instead of trying to recover. The only errors, which could be recovered, are related to invalid field names / values.
  Such errors are logged with the WARN level and the corresponding fields are skipped.

- Fix incorrect storage of the re-used name and value strings into fb.fields. The contents of the name and value strings
  must be copied per every loop, which reads these strings from the request stream. Otherwise the contents of the previously
  added Name and Value fields into fb.fields will be overwritten on the next loop.

- Ensure that LineReader.Line is set to nil after LineReader.NextLine() returns false. This should prevent from subtle bugs
  when the LineReader.Line is read after LineReader.NextLine() returns false.
2025-06-18 23:48:22 +02:00

159 lines
4.0 KiB
Go

package insertutil
import (
"bytes"
"errors"
"fmt"
"io"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
)
// LineReader reads newline-delimited lines from the underlying reader
type LineReader struct {
// Line contains the next line read after the call to NextLine
//
// The Line contents is valid until the next call to NextLine.
Line []byte
// name is the LineReader name
name string
// r is the underlying reader to read data from
r io.Reader
// buf is a buffer for reading the next line
buf []byte
// bufOffset is the offset at buf to read the next line from
bufOffset int
// err is the last error when reading data from r
err error
// eofReached is set to true when all the data is read from r
eofReached bool
}
// NewLineReader returns LineReader for r.
func NewLineReader(name string, r io.Reader) *LineReader {
return &LineReader{
name: name,
r: r,
}
}
// NextLine reads the next line from the underlying reader.
//
// It returns true if the next line is successfully read into Line.
// If the line length exceeds MaxLineSizeBytes, then this line is skipped
// and an empty line is returned instead.
//
// If false is returned, then no more lines left to read from r.
// Check for Err in this case.
func (lr *LineReader) NextLine() bool {
for {
lr.Line = nil
if lr.bufOffset >= len(lr.buf) {
if lr.err != nil || lr.eofReached {
return false
}
if !lr.readMoreData() {
return false
}
if lr.bufOffset >= len(lr.buf) && lr.eofReached {
return false
}
}
buf := lr.buf[lr.bufOffset:]
if n := bytes.IndexByte(buf, '\n'); n >= 0 {
lr.Line = buf[:n]
lr.bufOffset += n + 1
return true
}
if lr.eofReached {
lr.Line = buf
lr.bufOffset += len(buf)
return true
}
if !lr.readMoreData() {
return false
}
}
}
// Err returns the last error after NextLine call.
func (lr *LineReader) Err() error {
if lr.err == nil {
return nil
}
return fmt.Errorf("%s: %s", lr.name, lr.err)
}
func (lr *LineReader) readMoreData() bool {
if lr.bufOffset > 0 {
lr.buf = append(lr.buf[:0], lr.buf[lr.bufOffset:]...)
lr.bufOffset = 0
}
bufLen := len(lr.buf)
if bufLen >= MaxLineSizeBytes.IntN() {
ok, skippedBytes := lr.skipUntilNextLine()
logger.Warnf("%s: the line length exceeds -insert.maxLineSizeBytes=%d; skipping it; total skipped bytes=%d",
lr.name, MaxLineSizeBytes.IntN(), skippedBytes)
tooLongLinesSkipped.Inc()
return ok
}
lr.buf = slicesutil.SetLength(lr.buf, MaxLineSizeBytes.IntN())
n, err := lr.r.Read(lr.buf[bufLen:])
lr.buf = lr.buf[:bufLen+n]
if err != nil {
if errors.Is(err, io.EOF) {
lr.eofReached = true
return true
}
lr.err = fmt.Errorf("cannot read the next line: %s", err)
}
return n > 0
}
var tooLongLinesSkipped = metrics.NewCounter("vl_too_long_lines_skipped_total")
func (lr *LineReader) skipUntilNextLine() (bool, int) {
// Initialize skipped bytes count with MaxLineSizeBytes because
// we've already read that many bytes without encountering a newline,
// indicating the line size exceeds the maximum allowed limit.
skipSizeBytes := MaxLineSizeBytes.IntN()
for {
lr.buf = slicesutil.SetLength(lr.buf, MaxLineSizeBytes.IntN())
n, err := lr.r.Read(lr.buf)
skipSizeBytes += n
lr.buf = lr.buf[:n]
if err != nil {
if errors.Is(err, io.EOF) {
lr.eofReached = true
lr.buf = lr.buf[:0]
return true, skipSizeBytes
}
lr.err = fmt.Errorf("cannot skip the current line: %s", err)
return false, skipSizeBytes
}
if n := bytes.IndexByte(lr.buf, '\n'); n >= 0 {
// Include skipped bytes before \n, including the newline itself.
skipSizeBytes += n + 1 - len(lr.buf)
// Include \n in the buf, so too long line is replaced with an empty line.
// This is needed for maintaining synchorinzation consistency between lines
// in protocols such as Elasticsearch bulk import.
lr.buf = append(lr.buf[:0], lr.buf[n:]...)
return true, skipSizeBytes
}
}
}