package stringsutil import ( "slices" "sync" "unicode" "unicode/utf8" "unsafe" ) // LimitStringLen limits the length of s with maxLen. // // If len(s) > maxLen, then s is replaced with "s_prefix..s_suffix", // so the total length of the returned string doesn't exceed maxLen. func LimitStringLen(s string, maxLen int) string { if maxLen < 4 { maxLen = 4 } if len(s) <= maxLen { return s } n := (maxLen / 2) - 1 return s[:n] + ".." + s[len(s)-n:] } // AppendLowercase appends lowercase s to dst and returns the result. // It is recommended to use ToLowercaseFunc if possible to avoid copying of s. func AppendLowercase(dst []byte, s string) []byte { // Try to find the first uppercase character. n := uppercaseIndex(s) if n < 0 { // Fast path: no uppercase characters found. dst = append(dst, s...) return dst } // Slow path: convert s to lowercase. dst = slices.Grow(dst, len(s)) dst = append(dst, s[:n]...) s = s[n:] return appendLowercaseInternal(dst, s) } func appendLowercaseInternal(dst []byte, s string) []byte { dstLen := len(dst) // Try fast path at first by assuming that s contains only ASCII chars. hasUnicodeChars := false for i := range len(s) { c := s[i] if c >= utf8.RuneSelf { hasUnicodeChars = true break } if c >= 'A' && c <= 'Z' { c += 'a' - 'A' } dst = append(dst, c) } if hasUnicodeChars { // Slow path - s contains non-ASCII chars. Use Unicode encoding. dst = dst[:dstLen] for _, r := range s { r = unicode.ToLower(r) dst = utf8.AppendRune(dst, r) } } return dst } // ToLowercaseFunc calls f with a lowercase version of s. // The resulting value is only valid during the f call. func ToLowercaseFunc(s string, f func(s string)) { // Try to find the first uppercase character. n := uppercaseIndex(s) if n < 0 { // Fast path: no uppercase characters found. f(s) return } sb := getStringBuilder() defer putStringBuilder(sb) sb.buf = slices.Grow(sb.buf, len(s)) sb.appendString(s[:n]) sb.buf = appendLowercaseInternal(sb.buf, s[n:]) f(sb.string()) } // IsLowercase returns true if the given string does not contain uppercase characters. func IsLowercase(s string) bool { return uppercaseIndex(s) < 0 } // uppercaseIndex returns the index of the first uppercase character in s, // or -1 if s does not contain uppercase characters. func uppercaseIndex(s string) int { idx := 0 // Fast path for ASCII-only strings - process 8 bytes at a time. for idx <= len(s)-8 { v := uint64FromString(s[idx:]) // ASCII characters have the 8th bit clear. // The operation bellow is the same as s[idx] < utf8.RuneSelf, but for multiple bytes. if isASCII := v&0x8080808080808080 == 0; !isASCII { break } // Check if any byte lacks the 6th bit, which indicates uppercase symbol or '@', '[', '\', ']', '^', '_'. mightHaveUpper := ^v&0x2020202020202020 != 0 if mightHaveUpper { for j := 0; j < 8; j++ { c := s[idx+j] if c >= 'A' && c <= 'Z' { return idx + j } } } idx += 8 } // Handle the rest of the s. for idx < len(s) { if c := s[idx]; c < utf8.RuneSelf { if c >= 'A' && c <= 'Z' { return idx } idx++ continue } r, size := utf8.DecodeRuneInString(s[idx:]) if r != unicode.ToLower(r) { return idx } idx += size } return -1 } // uint64FromString interprets the first 8 bytes of string b as a little-endian uint64. // The same as binary.LittleEndian.Uint64, but operates on strings. // // This function is a bit slower than (*uint64)(unsafe.Pointer(ptr)) alternative, // but does not have the issue with data alignment. See: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3927 func uint64FromString(b string) uint64 { _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 } type stringBuilder struct { buf []byte } func (sb *stringBuilder) appendString(s string) { sb.buf = append(sb.buf, s...) } func (sb *stringBuilder) reset() { sb.buf = sb.buf[:0] } func (sb *stringBuilder) string() string { return unsafe.String(unsafe.SliceData(sb.buf), len(sb.buf)) } var stringBuilderPool = sync.Pool{ New: func() any { return &stringBuilder{} }, } func getStringBuilder() *stringBuilder { return stringBuilderPool.Get().(*stringBuilder) } func putStringBuilder(sb *stringBuilder) { sb.reset() stringBuilderPool.Put(sb) }