cockroachdb · dhartunian · Mar 18, 2026 · Mar 18, 2026
diff --git a/bench_test.go b/bench_test.go
@@ -25,6 +25,61 @@ func BenchmarkRedact(b *testing.B) {
 	}
 }
 
+type benchAddr struct {
+	Host string
+	Port int
+}
+
+func (a benchAddr) SafeFormat(w SafePrinter, _ rune) {
+	w.Printf("%s:%d", a.Host, Safe(a.Port))
+}
+
+type benchRequest struct {
+	Method string
+	Path   string
+	From   benchAddr
+}
+
+func (r benchRequest) SafeFormat(w SafePrinter, _ rune) {
+	w.Printf("%s %s from %v", Safe(r.Method), r.Path, r.From)
+}
+
+func BenchmarkSprintfWithSafeFormatter(b *testing.B) {
+	req := benchRequest{
+		Method: "GET",
+		Path:   "/api/v1/users",
+		From:   benchAddr{Host: "192.168.1.1", Port: 8080},
+	}
+
+	b.Run("single_struct", func(b *testing.B) {
+		addr := benchAddr{Host: "10.0.0.1", Port: 5432}
+		for i := 0; i < b.N; i++ {
+			_ = Sprintf("connecting to %v", addr)
+		}
+	})
+
+	b.Run("nested_structs", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = Sprintf("received %v", req)
+		}
+	})
+
+	b.Run("multiple_structs", func(b *testing.B) {
+		src := benchAddr{Host: "10.0.0.1", Port: 3000}
+		dst := benchAddr{Host: "10.0.0.2", Port: 5432}
+		for i := 0; i < b.N; i++ {
+			_ = Sprintf("proxy %v -> %v for %v", src, dst, req)
+		}
+	})
+
+	b.Run("sprint_mixed", func(b *testing.B) {
+		addr := benchAddr{Host: "10.0.0.1", Port: 5432}
+		for i := 0; i < b.N; i++ {
+			_ = Sprint("request ", req, " via ", addr, " user=", "alice")
+		}
+	})
+}
+
 // BenchmarkRedactCall_PlainMarkers calls .Redact() on a string with only
 // regular ‹...› markers (no hash markers). This is the baseline.
 func BenchmarkRedactCall_RegularRedaction(b *testing.B) {
@@ -57,4 +112,3 @@ func BenchmarkRedactCall_HashWithSalt(b *testing.B) {
 		_ = s.Redact()
 	}
 }
-
diff --git a/internal/escape/escape.go b/internal/escape/escape.go
@@ -35,11 +35,23 @@ func InternalEscapeBytes(b []byte, startLoc int, breakNewLines, strip bool) (res
 	// Note: we use len(...RedactableS) and not len(...RedactableBytes)
 	// because the ...S variant is a compile-time constant so this
 	// accelerates the loops below.
-	start, ls := m.StartBytes, len(m.StartS)
-	end, le := m.EndBytes, len(m.EndS)
-	hashPrefix, lh := m.HashPrefixBytes, len(m.HashPrefixS)
+	start := m.StartBytes
+	ls := len(m.StartS)
+	end := m.EndBytes
+	le := len(m.EndS)
+	hashPrefix := m.HashPrefixBytes
+	lh := len(m.HashPrefixS)
 	escape := m.EscapeMarkBytes
 
+	// All markers share the same lead byte (0xE2) and second byte (0x80).
+	// This invariant is verified by the init() check in markers.go.
+	// We use this to skip over ASCII data quickly.
+	lead := start[0]
+	mid := start[1]
+	b2Start := start[2]
+	b2End := end[2]
+	b2Hash := hashPrefix[2]
+
 	// Trim final newlines/spaces, for convenience.
 	if strip {
 		end := len(b)
@@ -64,68 +76,95 @@ func InternalEscapeBytes(b []byte, startLoc int, breakNewLines, strip bool) (res
 	// already copied into res (if copied=true).
 	k := 0
 
-	for i := startLoc; i < len(b); i++ {
-		if breakNewLines && b[i] == '\n' {
+	for i := startLoc; i < len(b); {
+		// Use bytes.IndexByte to skip over runs of bytes that can't
+		// start a marker. The lead byte (0xE2) starts all marker
+		// sequences. When breakNewLines is false, we only need to find
+		// the lead byte. When true, we need to handle newlines too.
+		remaining := b[i:]
+		var idx int
+		if !breakNewLines {
+			idx = bytes.IndexByte(remaining, lead)
+		} else {
+			// Find the first byte that could be interesting: lead or newline.
+			// Use two IndexByte calls and take the minimum.
+			idxLead := bytes.IndexByte(remaining, lead)
+			idxNL := bytes.IndexByte(remaining, '\n')
+			if idxLead < 0 {
+				idx = idxNL
+			} else if idxNL < 0 {
+				idx = idxLead
+			} else if idxLead < idxNL {
+				idx = idxLead
+			} else {
+				idx = idxNL
+			}
+		}
+		if idx < 0 {
+			break
+		}
+		i += idx
+		c := b[i]
+
+		if breakNewLines && c == '\n' {
 			if !copied {
-				// We only allocate an output slice when we know we definitely
-				// need it.
 				res = make([]byte, 0, len(b))
 				copied = true
 			}
 			res = append(res, b[k:i]...)
-			// Either add an end marker, or elide a start marker immediately prior.
+
+			// Close the current redaction section before the newline.
+			// If the last thing we emitted was a start marker, remove
+			// it instead of producing an empty ‹› pair.
 			if bytes.HasSuffix(res, start) {
 				res = res[:len(res)-ls]
 			} else {
 				res = append(res, end...)
 			}
-			// Advance to the last newline character. We want to forward
-			// them all in a single call to doWrite, for performance.
+
+			// Emit all consecutive newlines as-is, outside any
+			// redaction envelope.
 			lastNewLine := i
 			for lastNewLine < len(b) && b[lastNewLine] == '\n' {
 				lastNewLine++
 			}
 			res = append(res, b[i:lastNewLine]...)
+
+			// Reopen the redaction section for content after the
+			// newline(s). The caller will emit the closing marker.
 			res = append(res, start...)
 			k = lastNewLine
-			i = lastNewLine - 1
-		} else
-		// Ensure that occurrences of the delimiter inside the string get
-		// escaped.
-		// Reminder: ls and le are likely greater than 1, as we are scanning
-		// utf-8 encoded delimiters (the utf-8 encoding is multibyte).
-		if i+ls <= len(b) && bytes.Equal(b[i:i+ls], start) {
-			if !copied {
-				// We only allocate an output slice when we know we definitely
-				// need it.
-				res = make([]byte, 0, len(b)+len(escape))
-				copied = true
-			}
-			res = append(res, b[k:i]...)
-			res = append(res, escape...)
-			// Advance the counters by the length (in bytes) of the delimiter.
-			k = i + ls
-			i += ls - 1 /* -1 because we have i++ at the end of every iteration */
-		} else if i+le <= len(b) && bytes.Equal(b[i:i+le], end) {
-			if !copied {
-				// See the comment above about res allocation.
-				res = make([]byte, 0, len(b)+len(escape))
-				copied = true
-			}
-			res = append(res, b[k:i]...)
-			res = append(res, escape...)
-			// Advance the counters by the length (in bytes) of the delimiter.
-			k = i + le
-			i += le - 1 /* -1 because we have i++ at the end of every iteration */
-		} else if i+lh <= len(b) && bytes.Equal(b[i:i+lh], hashPrefix) {
+			i = lastNewLine
+			continue
+		}
+
+		// c == lead (0xE2). Check if we have a full marker.
+		if i+2 >= len(b) || b[i+1] != mid {
+			i++
+			continue
+		}
+
+		b2 := b[i+2]
+		markerLen := 0
+		if b2 == b2Start {
+			markerLen = ls
+		} else if b2 == b2End {
+			markerLen = le
+		} else if b2 == b2Hash {
+			markerLen = lh
+		}
+
+		if markerLen > 0 {
 			if !copied {
 				res = make([]byte, 0, len(b)+len(escape))
 				copied = true
 			}
 			res = append(res, b[k:i]...)
 			res = append(res, escape...)
-			k = i + lh
-			i += lh - 1
+			k = i + markerLen
+			i += markerLen
+		} else {
+			i++
 		}
 	}
 	// If the string terminates with an invalid utf-8 sequence, we

diff --git a/internal/escape/escape_test.go b/internal/escape/escape_test.go
@@ -24,22 +24,95 @@ func TestInternalEscape(t *testing.T) {
 		strip    bool
 		expected string
 	}{
+		// Empty / nil inputs.
 		{nil, 0, false, false, ""},
 		{[]byte(""), 0, false, false, ""},
+
+		// Pure ASCII, no markers.
 		{[]byte("abc"), 0, false, false, "abc"},
+		{[]byte("hello world 12345"), 0, false, false, "hello world 12345"},
+
+		// Start marker escaping.
 		{[]byte("‹abc›"), 0, false, false, "?abc?"},
 		{[]byte("‹abc›"), 3, false, false, "‹abc?"},
 		{[]byte("‹abc›def›ghi"), 3, false, false, "‹abc?def?ghi"},
 		{[]byte("‹abc›"), len([]byte("‹abc›")), false, false, "‹abc›"},
 		{[]byte("‹abc›‹def›"), len([]byte("‹abc›")), false, false, "‹abc›?def?"},
+
+		// Multiple markers in sequence.
+		{[]byte("‹‹‹"), 0, false, false, "???"},
+		{[]byte("›››"), 0, false, false, "???"},
+		{[]byte("‹›‹›"), 0, false, false, "????"},
+
+		// Markers with surrounding text.
+		{[]byte("before‹mid›after"), 0, false, false, "before?mid?after"},
+		{[]byte("a‹b›c‹d›e"), 0, false, false, "a?b?c?d?e"},
+
+		// Newline handling (breakNewLines=false, should not break).
 		{[]byte("‹abc›\n‹d\nef›"), len([]byte("‹abc›")), false, false, "‹abc›\n?d\nef?"},
+
+		// Newline handling (breakNewLines=true).
 		{[]byte("abc\n‹d\nef›\n \n\n "), len([]byte("abc")), true, false, "abc›\n‹?d›\n‹ef?›\n‹ ›\n\n‹ "},
 		{[]byte("abc\n‹d\nef›\n \n\n "), len([]byte("abc")), true, true, "abc›\n‹?d›\n‹ef?"},
 		{[]byte("‹abc› ‹def›"), len([]byte("‹abc› ")), true, true, "‹abc› ?def?"},
 		{[]byte("abc‹\ndef"), len([]byte("abc‹")), true, true, "abc\n‹def"},
+
+		// Multiple consecutive newlines with breakNewLines.
+		{[]byte("a\n\n\nb"), 0, true, false, "a›\n\n\n‹b"},
+		{[]byte("\nabc"), 0, true, false, "›\n‹abc"},
+
+		// Hash prefix escaping.
 		{[]byte("†abc"), 0, false, false, "?abc"},
 		{[]byte("‹†abc›"), 3, false, false, "‹?abc?"},
 		{[]byte("hello†world"), 0, false, false, "hello?world"},
+		{[]byte("†"), 0, false, false, "?"},
+		{[]byte("a†b†c"), 0, false, false, "a?b?c"},
+
+		// All three marker types together.
+		{[]byte("‹†›"), 0, false, false, "???"},
+
+		// Truncated lead byte at end of input (0xE2 without enough following bytes).
+		// 0xE2 alone at end — not a complete marker, should pass through.
+		{[]byte("abc\xe2"), 0, false, false, "abc\xe2?"},
+		// 0xE2 0x80 at end — still not a complete marker.
+		{[]byte("abc\xe2\x80"), 0, false, false, "abc\xe2\x80?"},
+
+		// Lead byte 0xE2 with wrong second byte (not 0x80).
+		// This is a valid UTF-8 sequence but not a marker.
+		{[]byte("café"), 0, false, false, "café"},                   // é = 0xC3 0xA9, no lead byte
+		{[]byte("abc\xe2\x82\xac def"), 0, false, false, "abc€ def"}, // € = E2 82 AC, lead matches but mid doesn't
+
+		// Lead byte 0xE2 0x80 followed by non-marker third byte.
+		// U+2014 EM DASH = E2 80 94, shares lead+mid but third byte doesn't match.
+		{[]byte("hello\xe2\x80\x94world"), 0, false, false, "hello—world"},
+		// U+2026 ELLIPSIS = E2 80 A6.
+		{[]byte("wait\xe2\x80\xa6"), 0, false, false, "wait…"},
+
+		// Trailing invalid UTF-8 (RuneError) — single invalid byte at end.
+		{[]byte("abc\xff"), 0, false, false, "abc\xff?"},
+		// Invalid byte at end with no prior escaping needed.
+		{[]byte("hello\x80"), 0, false, false, "hello\x80?"},
+
+		// Invalid trailing byte combined with markers.
+		{[]byte("‹x›\xff"), 0, false, false, "?x?\xff?"},
+
+		// Strip mode.
+		{[]byte("abc \n"), 0, false, true, "abc"},
+		{[]byte("abc   "), 0, false, true, "abc"},
+		{[]byte("abc\n\n\n"), 0, false, true, "abc"},
+
+		// Start offset beyond input length.
+		{[]byte("abc"), 5, false, false, "abc"},
+
+		// Start offset at exact end.
+		{[]byte("abc"), 3, false, false, "abc"},
+
+		// Markers only after start offset.
+		{[]byte("‹abc›‹def›"), 0, false, false, "?abc??def?"},
+
+		// breakNewLines with markers and newlines interleaved.
+		{[]byte("‹a\nb›"), 0, true, false, "?a›\n‹b?"},
+		{[]byte("x\n‹y›\nz"), 0, true, false, "x›\n‹?y?›\n‹z"},
 	}
 
 	for _, tc := range testCases {