Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 55 additions & 1 deletion bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,61 @@ func BenchmarkRedact(b *testing.B) {
}
}

type benchAddr struct {
Host string
Port int
}

func (a benchAddr) SafeFormat(w SafePrinter, _ rune) {
w.Printf("%s:%d", a.Host, Safe(a.Port))
}

type benchRequest struct {
Method string
Path string
From benchAddr
}

func (r benchRequest) SafeFormat(w SafePrinter, _ rune) {
w.Printf("%s %s from %v", Safe(r.Method), r.Path, r.From)
}

func BenchmarkSprintfWithSafeFormatter(b *testing.B) {
req := benchRequest{
Method: "GET",
Path: "/api/v1/users",
From: benchAddr{Host: "192.168.1.1", Port: 8080},
}

b.Run("single_struct", func(b *testing.B) {
addr := benchAddr{Host: "10.0.0.1", Port: 5432}
for i := 0; i < b.N; i++ {
_ = Sprintf("connecting to %v", addr)
}
})

b.Run("nested_structs", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = Sprintf("received %v", req)
}
})

b.Run("multiple_structs", func(b *testing.B) {
src := benchAddr{Host: "10.0.0.1", Port: 3000}
dst := benchAddr{Host: "10.0.0.2", Port: 5432}
for i := 0; i < b.N; i++ {
_ = Sprintf("proxy %v -> %v for %v", src, dst, req)
}
})

b.Run("sprint_mixed", func(b *testing.B) {
addr := benchAddr{Host: "10.0.0.1", Port: 5432}
for i := 0; i < b.N; i++ {
_ = Sprint("request ", req, " via ", addr, " user=", "alice")
}
})
}

// BenchmarkRedactCall_PlainMarkers calls .Redact() on a string with only
// regular ‹...› markers (no hash markers). This is the baseline.
func BenchmarkRedactCall_RegularRedaction(b *testing.B) {
Expand Down Expand Up @@ -57,4 +112,3 @@ func BenchmarkRedactCall_HashWithSalt(b *testing.B) {
_ = s.Redact()
}
}

123 changes: 81 additions & 42 deletions internal/escape/escape.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,23 @@ func InternalEscapeBytes(b []byte, startLoc int, breakNewLines, strip bool) (res
// Note: we use len(...RedactableS) and not len(...RedactableBytes)
// because the ...S variant is a compile-time constant so this
// accelerates the loops below.
start, ls := m.StartBytes, len(m.StartS)
end, le := m.EndBytes, len(m.EndS)
hashPrefix, lh := m.HashPrefixBytes, len(m.HashPrefixS)
start := m.StartBytes
ls := len(m.StartS)
end := m.EndBytes
le := len(m.EndS)
hashPrefix := m.HashPrefixBytes
lh := len(m.HashPrefixS)
escape := m.EscapeMarkBytes

// All markers share the same lead byte (0xE2) and second byte (0x80).
// This invariant is verified by the init() check in markers.go.
// We use this to skip over ASCII data quickly.
lead := start[0]
mid := start[1]
b2Start := start[2]
b2End := end[2]
b2Hash := hashPrefix[2]

// Trim final newlines/spaces, for convenience.
if strip {
end := len(b)
Expand All @@ -64,68 +76,95 @@ func InternalEscapeBytes(b []byte, startLoc int, breakNewLines, strip bool) (res
// already copied into res (if copied=true).
k := 0

for i := startLoc; i < len(b); i++ {
if breakNewLines && b[i] == '\n' {
for i := startLoc; i < len(b); {
// Use bytes.IndexByte to skip over runs of bytes that can't
// start a marker. The lead byte (0xE2) starts all marker
// sequences. When breakNewLines is false, we only need to find
// the lead byte. When true, we need to handle newlines too.
remaining := b[i:]
var idx int
if !breakNewLines {
idx = bytes.IndexByte(remaining, lead)
} else {
// Find the first byte that could be interesting: lead or newline.
// Use two IndexByte calls and take the minimum.
idxLead := bytes.IndexByte(remaining, lead)
idxNL := bytes.IndexByte(remaining, '\n')
if idxLead < 0 {
idx = idxNL
} else if idxNL < 0 {
idx = idxLead
} else if idxLead < idxNL {
idx = idxLead
} else {
idx = idxNL
}
}
if idx < 0 {
break
}
i += idx
c := b[i]

if breakNewLines && c == '\n' {
if !copied {
// We only allocate an output slice when we know we definitely
// need it.
res = make([]byte, 0, len(b))
copied = true
}
res = append(res, b[k:i]...)
// Either add an end marker, or elide a start marker immediately prior.

// Close the current redaction section before the newline.
// If the last thing we emitted was a start marker, remove
// it instead of producing an empty ‹› pair.
if bytes.HasSuffix(res, start) {
res = res[:len(res)-ls]
} else {
res = append(res, end...)
}
// Advance to the last newline character. We want to forward
// them all in a single call to doWrite, for performance.

// Emit all consecutive newlines as-is, outside any
// redaction envelope.
lastNewLine := i
for lastNewLine < len(b) && b[lastNewLine] == '\n' {
lastNewLine++
}
res = append(res, b[i:lastNewLine]...)

// Reopen the redaction section for content after the
// newline(s). The caller will emit the closing marker.
res = append(res, start...)
k = lastNewLine
i = lastNewLine - 1
} else
// Ensure that occurrences of the delimiter inside the string get
// escaped.
// Reminder: ls and le are likely greater than 1, as we are scanning
// utf-8 encoded delimiters (the utf-8 encoding is multibyte).
if i+ls <= len(b) && bytes.Equal(b[i:i+ls], start) {
if !copied {
// We only allocate an output slice when we know we definitely
// need it.
res = make([]byte, 0, len(b)+len(escape))
copied = true
}
res = append(res, b[k:i]...)
res = append(res, escape...)
// Advance the counters by the length (in bytes) of the delimiter.
k = i + ls
i += ls - 1 /* -1 because we have i++ at the end of every iteration */
} else if i+le <= len(b) && bytes.Equal(b[i:i+le], end) {
if !copied {
// See the comment above about res allocation.
res = make([]byte, 0, len(b)+len(escape))
copied = true
}
res = append(res, b[k:i]...)
res = append(res, escape...)
// Advance the counters by the length (in bytes) of the delimiter.
k = i + le
i += le - 1 /* -1 because we have i++ at the end of every iteration */
} else if i+lh <= len(b) && bytes.Equal(b[i:i+lh], hashPrefix) {
i = lastNewLine
continue
}

// c == lead (0xE2). Check if we have a full marker.
if i+2 >= len(b) || b[i+1] != mid {
i++
continue
}

b2 := b[i+2]
markerLen := 0
if b2 == b2Start {
markerLen = ls
} else if b2 == b2End {
markerLen = le
} else if b2 == b2Hash {
markerLen = lh
}

if markerLen > 0 {
if !copied {
res = make([]byte, 0, len(b)+len(escape))
copied = true
}
res = append(res, b[k:i]...)
res = append(res, escape...)
k = i + lh
i += lh - 1
k = i + markerLen
i += markerLen
} else {
i++
}
}
// If the string terminates with an invalid utf-8 sequence, we
Expand Down
73 changes: 73 additions & 0 deletions internal/escape/escape_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,95 @@ func TestInternalEscape(t *testing.T) {
strip bool
expected string
}{
// Empty / nil inputs.
{nil, 0, false, false, ""},
{[]byte(""), 0, false, false, ""},

// Pure ASCII, no markers.
{[]byte("abc"), 0, false, false, "abc"},
{[]byte("hello world 12345"), 0, false, false, "hello world 12345"},

// Start marker escaping.
{[]byte("‹abc›"), 0, false, false, "?abc?"},
{[]byte("‹abc›"), 3, false, false, "‹abc?"},
{[]byte("‹abc›def›ghi"), 3, false, false, "‹abc?def?ghi"},
{[]byte("‹abc›"), len([]byte("‹abc›")), false, false, "‹abc›"},
{[]byte("‹abc›‹def›"), len([]byte("‹abc›")), false, false, "‹abc›?def?"},

// Multiple markers in sequence.
{[]byte("‹‹‹"), 0, false, false, "???"},
{[]byte("›››"), 0, false, false, "???"},
{[]byte("‹›‹›"), 0, false, false, "????"},

// Markers with surrounding text.
{[]byte("before‹mid›after"), 0, false, false, "before?mid?after"},
{[]byte("a‹b›c‹d›e"), 0, false, false, "a?b?c?d?e"},

// Newline handling (breakNewLines=false, should not break).
{[]byte("‹abc›\n‹d\nef›"), len([]byte("‹abc›")), false, false, "‹abc›\n?d\nef?"},

// Newline handling (breakNewLines=true).
{[]byte("abc\n‹d\nef›\n \n\n "), len([]byte("abc")), true, false, "abc›\n‹?d›\n‹ef?›\n‹ ›\n\n‹ "},
{[]byte("abc\n‹d\nef›\n \n\n "), len([]byte("abc")), true, true, "abc›\n‹?d›\n‹ef?"},
{[]byte("‹abc› ‹def›"), len([]byte("‹abc› ")), true, true, "‹abc› ?def?"},
{[]byte("abc‹\ndef"), len([]byte("abc‹")), true, true, "abc\n‹def"},

// Multiple consecutive newlines with breakNewLines.
{[]byte("a\n\n\nb"), 0, true, false, "a›\n\n\n‹b"},
{[]byte("\nabc"), 0, true, false, "›\n‹abc"},

// Hash prefix escaping.
{[]byte("†abc"), 0, false, false, "?abc"},
{[]byte("‹†abc›"), 3, false, false, "‹?abc?"},
{[]byte("hello†world"), 0, false, false, "hello?world"},
{[]byte("†"), 0, false, false, "?"},
{[]byte("a†b†c"), 0, false, false, "a?b?c"},

// All three marker types together.
{[]byte("‹†›"), 0, false, false, "???"},

// Truncated lead byte at end of input (0xE2 without enough following bytes).
// 0xE2 alone at end — not a complete marker, should pass through.
{[]byte("abc\xe2"), 0, false, false, "abc\xe2?"},
// 0xE2 0x80 at end — still not a complete marker.
{[]byte("abc\xe2\x80"), 0, false, false, "abc\xe2\x80?"},

// Lead byte 0xE2 with wrong second byte (not 0x80).
// This is a valid UTF-8 sequence but not a marker.
{[]byte("café"), 0, false, false, "café"}, // é = 0xC3 0xA9, no lead byte
{[]byte("abc\xe2\x82\xac def"), 0, false, false, "abc€ def"}, // € = E2 82 AC, lead matches but mid doesn't

// Lead byte 0xE2 0x80 followed by non-marker third byte.
// U+2014 EM DASH = E2 80 94, shares lead+mid but third byte doesn't match.
{[]byte("hello\xe2\x80\x94world"), 0, false, false, "hello—world"},
// U+2026 ELLIPSIS = E2 80 A6.
{[]byte("wait\xe2\x80\xa6"), 0, false, false, "wait…"},

// Trailing invalid UTF-8 (RuneError) — single invalid byte at end.
{[]byte("abc\xff"), 0, false, false, "abc\xff?"},
// Invalid byte at end with no prior escaping needed.
{[]byte("hello\x80"), 0, false, false, "hello\x80?"},

// Invalid trailing byte combined with markers.
{[]byte("‹x›\xff"), 0, false, false, "?x?\xff?"},

// Strip mode.
{[]byte("abc \n"), 0, false, true, "abc"},
{[]byte("abc "), 0, false, true, "abc"},
{[]byte("abc\n\n\n"), 0, false, true, "abc"},

// Start offset beyond input length.
{[]byte("abc"), 5, false, false, "abc"},

// Start offset at exact end.
{[]byte("abc"), 3, false, false, "abc"},

// Markers only after start offset.
{[]byte("‹abc›‹def›"), 0, false, false, "?abc??def?"},

// breakNewLines with markers and newlines interleaved.
{[]byte("‹a\nb›"), 0, true, false, "?a›\n‹b?"},
{[]byte("x\n‹y›\nz"), 0, true, false, "x›\n‹?y?›\n‹z"},
}

for _, tc := range testCases {
Expand Down
Loading