From 86b48717baa91364d9423ee80c649b9e95e09461 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 10:40:30 -0800 Subject: [PATCH 01/14] Gate quote scanning by head byte Avoid triple-quote probing on non-quote tokens in Parser.nextToken.scanString.\n\nThe tokenizer now branches on the head code unit before running single-quote or double-quote string logic, keeping existing parse behavior while removing repeated quote checks for numeric/date/bare-token paths.\n\nAlso cache range.upperBound in a local for scan loops and trim date/value range checks to use that cached bound. --- Sources/TOMLDecoder/Parsing/Parser.swift | 176 +++++++++++------------ 1 file changed, 83 insertions(+), 93 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 49e9353..532185f 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -149,13 +149,14 @@ struct Parser: ~Copyable { let isBareKeyChar = CodeUnits.isBareKeyChar let isValueChar = CodeUnits.isValueChar let start = range.lowerBound + let end = range.upperBound let head = bytes[start] if (head >= CodeUnits.lowerA && head <= CodeUnits.lowerZ) || (head >= CodeUnits.upperA && head <= CodeUnits.upperZ) || head == CodeUnits.underscore { var index = start + 1 - while index < range.upperBound { + while index < end { let ch = bytes[index] if isBareKeyChar[Int(ch)] { index += 1 @@ -167,88 +168,42 @@ struct Parser: ~Copyable { return } - if start + 3 <= range.upperBound, - bytes[start] == CodeUnits.singleQuote, - bytes[start + 1] == CodeUnits.singleQuote, - bytes[start + 2] == CodeUnits.singleQuote - { - var i = start + 3 - var newlinesInToken = 0 + if head == CodeUnits.singleQuote { + if start + 3 <= end, + bytes[start + 1] == CodeUnits.singleQuote, + bytes[start + 2] == CodeUnits.singleQuote + { + var i = start + 3 + var newlinesInToken = 0 - while i < range.upperBound { - if bytes[i] == CodeUnits.lf { - newlinesInToken += 1 - } - if i + 3 <= range.upperBound, - bytes[i] == CodeUnits.singleQuote, - bytes[i + 1] == CodeUnits.singleQuote, - bytes[i + 2] == CodeUnits.singleQuote - { - if i + 3 >= range.upperBound || bytes[i + 3] != CodeUnits.singleQuote { - break + while i < end { + if bytes[i] == CodeUnits.lf { + newlinesInToken += 1 } - } - i += 1 - } - - guard i < range.upperBound else { - throw TOMLError( - .syntax(lineNumber: lineNumber, message: "unterminated triple-s-quote") - ) - } - - let end = i + 3 - emitToken(kind: .string, start: start, end: end, newlines: newlinesInToken) - return - } - - if start + 3 < range.upperBound, - bytes[start] == CodeUnits.doubleQuote, - bytes[start + 1] == CodeUnits.doubleQuote, - bytes[start + 2] == CodeUnits.doubleQuote - { - var i = start + 3 - let textCount = range.upperBound - var newlinesInToken = 0 - - while i < textCount { - if bytes[i] == CodeUnits.lf { - newlinesInToken += 1 - } - if i + 3 <= textCount, - bytes[i] == CodeUnits.doubleQuote, - bytes[i + 1] == CodeUnits.doubleQuote, - bytes[i + 2] == CodeUnits.doubleQuote - { - // Check if this is exactly 3 quotes (not part of a longer sequence) - if i + 3 >= textCount || bytes[i + 3] != CodeUnits.doubleQuote { - if bytes[i - 1] == CodeUnits.backslash { - i += 1 - continue + if i + 3 <= end, + bytes[i] == CodeUnits.singleQuote, + bytes[i + 1] == CodeUnits.singleQuote, + bytes[i + 2] == CodeUnits.singleQuote + { + if i + 3 >= end || bytes[i + 3] != CodeUnits.singleQuote { + break } - break } + i += 1 } - i += 1 - } - guard i < range.upperBound else { - throw TOMLError( - .syntax(lineNumber: lineNumber, message: "unterminated triple-d-quote") - ) - } + guard i < end else { + throw TOMLError( + .syntax(lineNumber: lineNumber, message: "unterminated triple-s-quote") + ) + } - let end = i + 3 - emitToken(kind: .string, start: start, end: end, newlines: newlinesInToken) - return - } + emitToken(kind: .string, start: start, end: i + 3, newlines: newlinesInToken) + return + } - let ch = bytes[start] - if ch == CodeUnits.singleQuote { var i = start + 1 - let textCount = range.upperBound - - while i < textCount { + while i < end { let ch = bytes[i] if ch == CodeUnits.singleQuote || ch == CodeUnits.lf { break @@ -256,7 +211,7 @@ struct Parser: ~Copyable { i += 1 } - if i >= textCount || bytes[i] != CodeUnits.singleQuote { + if i >= end || bytes[i] != CodeUnits.singleQuote { throw TOMLError( .syntax(lineNumber: lineNumber, message: "unterminated s-quote") ) @@ -266,11 +221,48 @@ struct Parser: ~Copyable { return } - if ch == CodeUnits.doubleQuote { + if head == CodeUnits.doubleQuote { + if start + 3 < end, + bytes[start + 1] == CodeUnits.doubleQuote, + bytes[start + 2] == CodeUnits.doubleQuote + { + var i = start + 3 + var newlinesInToken = 0 + + while i < end { + if bytes[i] == CodeUnits.lf { + newlinesInToken += 1 + } + if i + 3 <= end, + bytes[i] == CodeUnits.doubleQuote, + bytes[i + 1] == CodeUnits.doubleQuote, + bytes[i + 2] == CodeUnits.doubleQuote + { + if i + 3 >= end || bytes[i + 3] != CodeUnits.doubleQuote { + if bytes[i - 1] == CodeUnits.backslash { + i += 1 + continue + } + break + } + } + i += 1 + } + + guard i < end else { + throw TOMLError( + .syntax(lineNumber: lineNumber, message: "unterminated triple-d-quote") + ) + } + + emitToken(kind: .string, start: start, end: i + 3, newlines: newlinesInToken) + return + } + var i = start + 1 // 8x unrolling for double-quoted strings - while i + 8 <= range.upperBound { + while i + 8 <= end { if bytes[i] == CodeUnits.backslash || bytes[i] == CodeUnits.doubleQuote || bytes[i] == CodeUnits.lf { break } if bytes[i + 1] == CodeUnits.backslash || bytes[i + 1] == CodeUnits.doubleQuote || bytes[i + 1] == CodeUnits.lf { break } if bytes[i + 2] == CodeUnits.backslash || bytes[i + 2] == CodeUnits.doubleQuote || bytes[i + 2] == CodeUnits.lf { break } @@ -282,11 +274,11 @@ struct Parser: ~Copyable { i += 8 } - while i < range.upperBound { + while i < end { let ch = bytes[i] if ch == CodeUnits.backslash { i += 1 - if i < range.upperBound { + if i < end { i += 1 continue } @@ -299,7 +291,7 @@ struct Parser: ~Copyable { i += 1 } - if i >= range.upperBound || bytes[i] != CodeUnits.doubleQuote { + if i >= end || bytes[i] != CodeUnits.doubleQuote { throw TOMLError( .syntax(lineNumber: lineNumber, message: "unterminated quote") ) @@ -313,34 +305,34 @@ struct Parser: ~Copyable { var index = start var dateEnder: Int? // Fast path: Dates must produce YYYY-MM-DD, so checks for the dash - if start + 4 < range.upperBound && bytes[start + 4] == CodeUnits.minus { + if start + 4 < end && bytes[start + 4] == CodeUnits.minus { dateEnder = scanDate(bytes: bytes, range: range)?.3 } - if let dateEnder, dateEnder < range.upperBound, + if let dateEnder, dateEnder < end, bytes[dateEnder] == CodeUnits.upperT || bytes[dateEnder] == CodeUnits.lowerT || bytes[dateEnder] == CodeUnits.space { let timeStarter = dateEnder + 1 if let timeEnder = scanTime( - bytes: bytes, range: timeStarter ..< range.upperBound + bytes: bytes, range: timeStarter ..< end )?.3 { index = timeEnder } } else if let dateEnder { index = dateEnder - } else if start + 2 < range.upperBound, bytes[start + 2] == CodeUnits.colon, + } else if start + 2 < end, bytes[start + 2] == CodeUnits.colon, let timeEnder = scanTime( - bytes: bytes, range: start ..< range.upperBound + bytes: bytes, range: start ..< end )?.3 { index = timeEnder } if index > start { - if index < range.upperBound { + if index < end { if bytes[index] == CodeUnits.dot { index += 1 - while index < range.upperBound, bytes[index] >= CodeUnits.number0, + while index < end, bytes[index] >= CodeUnits.number0, bytes[index] <= CodeUnits.number9 { index += 1 @@ -349,15 +341,13 @@ struct Parser: ~Copyable { if bytes[index] == CodeUnits.upperZ || bytes[index] == CodeUnits.lowerZ { index += 1 } else if let timzoneEnder = scanTimezoneOffset( - bytes: bytes, range: index ..< range.upperBound + bytes: bytes, range: index ..< end ) { index = timzoneEnder } } // squeeze out any spaces at end of string - while index >= start, - bytes[index - 1] == CodeUnits.space - { + while index > start, bytes[index - 1] == CodeUnits.space { index -= 1 } // tokenize @@ -369,7 +359,7 @@ struct Parser: ~Copyable { if isDotSpecial { var index = start var isValidKey = true - while index < range.upperBound { + while index < end { let ch = bytes[index] if isBareKeyChar[Int(ch)] { index += 1 @@ -385,7 +375,7 @@ struct Parser: ~Copyable { emitToken(kind: isValidKey ? .bareKey : .string, start: start, end: index) } else { var index = start - while index < range.upperBound { + while index < end { let ch = bytes[index] if isValueChar[Int(ch)] { index += 1 From fb266c72fcdc6ca1c5ad9d13c5e9c3c4f09bacea Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 10:46:31 -0800 Subject: [PATCH 02/14] Avoid staging terminal selector segment Reduce parseSelect path churn by storing only selector prefixes in tablePath. fillTablePath now returns the terminal key/hash/token directly instead of appending then removeLast. This removes one append/remove pair per selector while preserving walkTablePath semantics and selector error handling. --- Sources/TOMLDecoder/Parsing/Parser.swift | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 532185f..0a178b3 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -4,7 +4,7 @@ struct Parser: ~Copyable { var currentLineNumber = 1 var currentTable = 0 var currentTableIsKeyed = false - var tablePath: [(key: String, keyHash: Int, token: Token)] = [] + var tablePath: [(key: String, keyHash: Int)] = [] var tables: [InternalTOMLTable] = [InternalTOMLTable()] var arrays: [InternalTOMLArray] = [] var keyTables: [KeyTablePair] = [] @@ -877,7 +877,11 @@ struct Parser: ~Copyable { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "syntax error")) } - mutating func fillTablePath(bytes: UnsafeBufferPointer) throws(TOMLError) { + mutating func fillTablePath(bytes: UnsafeBufferPointer) throws(TOMLError) -> ( + key: String, + keyHash: Int, + token: Token + ) { let lineNumber = token.lineNumber tablePath.removeAll(keepingCapacity: true) @@ -886,23 +890,22 @@ struct Parser: ~Copyable { throw TOMLError(.syntax(lineNumber: lineNumber, message: "invalid or missing key")) } + let currentToken = token let (key, keyHash) = try normalizeKeyAndHash(bytes: bytes, token: token, keyTransform: keyTransform) - tablePath.append((key: key, keyHash: keyHash, token: token)) try nextToken(bytes: bytes, isDotSpecial: true) if token.kind == .rbracket { - break + return (key: key, keyHash: keyHash, token: currentToken) } + tablePath.append((key: key, keyHash: keyHash)) + if token.kind != .dot { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid key")) } try nextToken(bytes: bytes, isDotSpecial: true) } - if tablePath.isEmpty { - throw TOMLError(.syntax(lineNumber: lineNumber, message: "empty table selector")) - } } mutating func parseSelect(bytes: UnsafeBufferPointer) throws(TOMLError) { @@ -919,10 +922,7 @@ struct Parser: ~Copyable { try eatToken(bytes: bytes, kind: .lbracket, isDotSpecial: true) } - try fillTablePath(bytes: bytes) - - // For [x.y.z] or [[x.y.z]], remove z from tpath. - let (lastKey, lastKeyHash, z) = tablePath.removeLast() + let (lastKey, lastKeyHash, z) = try fillTablePath(bytes: bytes) try walkTablePath() if !llb { @@ -2032,7 +2032,7 @@ extension Parser { mutating func walkTablePath() throws(TOMLError) { var tableIndex = 0 var isKeyed = false - for (key, keyHash, _) in tablePath { + for (key, keyHash) in tablePath { switch tableValue(tableIndex: tableIndex, keyed: isKeyed, key: key, keyHash: keyHash) { case let .table(index): tableIndex = index From 2ea9a1068eb1233acd88708cd45d48f1e2528441 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 10:52:09 -0800 Subject: [PATCH 03/14] Use lookup table in basic string scan Add CodeUnits.isBasicStringBodyChar and use it in nextToken's double-quoted string scanner. This replaces repeated per-byte backslash/quote/newline comparisons with one table probe in both the unrolled and scalar loops. --- Sources/TOMLDecoder/Parsing/Constants.swift | 9 +++++++ Sources/TOMLDecoder/Parsing/Parser.swift | 26 +++++++++++---------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Constants.swift b/Sources/TOMLDecoder/Parsing/Constants.swift index f5b2261..ee789e5 100644 --- a/Sources/TOMLDecoder/Parsing/Constants.swift +++ b/Sources/TOMLDecoder/Parsing/Constants.swift @@ -84,6 +84,15 @@ enum CodeUnits { return UnsafePointer(ptr) }() + nonisolated(unsafe) static let isBasicStringBodyChar: UnsafePointer = { + let ptr = UnsafeMutablePointer.allocate(capacity: 256) + ptr.initialize(repeating: true, count: 256) + ptr[Int(CodeUnits.backslash)] = false + ptr[Int(CodeUnits.doubleQuote)] = false + ptr[Int(CodeUnits.lf)] = false + return UnsafePointer(ptr) + }() + static let null: UTF8.CodeUnit = 0x00 static let unitSeparator: UTF8.CodeUnit = 0x1F static let delete: UTF8.CodeUnit = 0x7F diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 0a178b3..7a37750 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -148,6 +148,7 @@ struct Parser: ~Copyable { func scanString(range: Range, lineNumber: Int) throws(TOMLError) { let isBareKeyChar = CodeUnits.isBareKeyChar let isValueChar = CodeUnits.isValueChar + let isBasicStringBodyChar = CodeUnits.isBasicStringBodyChar let start = range.lowerBound let end = range.upperBound let head = bytes[start] @@ -263,19 +264,23 @@ struct Parser: ~Copyable { // 8x unrolling for double-quoted strings while i + 8 <= end { - if bytes[i] == CodeUnits.backslash || bytes[i] == CodeUnits.doubleQuote || bytes[i] == CodeUnits.lf { break } - if bytes[i + 1] == CodeUnits.backslash || bytes[i + 1] == CodeUnits.doubleQuote || bytes[i + 1] == CodeUnits.lf { break } - if bytes[i + 2] == CodeUnits.backslash || bytes[i + 2] == CodeUnits.doubleQuote || bytes[i + 2] == CodeUnits.lf { break } - if bytes[i + 3] == CodeUnits.backslash || bytes[i + 3] == CodeUnits.doubleQuote || bytes[i + 3] == CodeUnits.lf { break } - if bytes[i + 4] == CodeUnits.backslash || bytes[i + 4] == CodeUnits.doubleQuote || bytes[i + 4] == CodeUnits.lf { break } - if bytes[i + 5] == CodeUnits.backslash || bytes[i + 5] == CodeUnits.doubleQuote || bytes[i + 5] == CodeUnits.lf { break } - if bytes[i + 6] == CodeUnits.backslash || bytes[i + 6] == CodeUnits.doubleQuote || bytes[i + 6] == CodeUnits.lf { break } - if bytes[i + 7] == CodeUnits.backslash || bytes[i + 7] == CodeUnits.doubleQuote || bytes[i + 7] == CodeUnits.lf { break } + if !isBasicStringBodyChar[Int(bytes[i])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 1])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 2])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 3])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 4])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 5])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 6])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 7])] { break } i += 8 } while i < end { let ch = bytes[i] + if isBasicStringBodyChar[Int(ch)] { + i += 1 + continue + } if ch == CodeUnits.backslash { i += 1 if i < end { @@ -285,10 +290,7 @@ struct Parser: ~Copyable { break } - if ch == CodeUnits.lf || ch == CodeUnits.doubleQuote { - break - } - i += 1 + break } if i >= end || bytes[i] != CodeUnits.doubleQuote { From 8b1c33a0e67936d0e0f6ecabb50d319b59b065aa Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 11:07:51 -0800 Subject: [PATCH 04/14] Cache matcher index counts Hoist out of the inner loops in matchKeyValue, matchKeyArray, and matchKeyTable.\n\nThis keeps lookup behavior unchanged while removing repeated count reads in hot matcher loops. --- Sources/TOMLDecoder/Parsing/Parser.swift | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 7a37750..65e1a32 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -1921,8 +1921,9 @@ extension Parser { guard let keyValueBase = keyValueBuffer.baseAddress else { return nil } + let indexCount = indices.count var i = 0 - while i < indices.count { + while i < indexCount { let keyValueIndex = indices[i] let keyValuePair = keyValueBase.advanced(by: keyValueIndex).pointee if keyValuePair.keyHash == keyHash, keyValuePair.key == key { @@ -1943,8 +1944,9 @@ extension Parser { guard let keyArrayBase = keyArrayBuffer.baseAddress else { return nil } + let indexCount = indices.count var i = 0 - while i < indices.count { + while i < indexCount { let keyArrayIndex = indices[i] let keyArrayPair = keyArrayBase.advanced(by: keyArrayIndex).pointee if keyArrayPair.keyHash == keyHash, keyArrayPair.key == key { @@ -1965,8 +1967,9 @@ extension Parser { guard let keyTableBase = keyTableBuffer.baseAddress else { return nil } + let indexCount = indices.count var i = 0 - while i < indices.count { + while i < indexCount { let keyTableIndex = indices[i] let keyTablePair = keyTableBase.advanced(by: keyTableIndex).pointee if keyTablePair.keyHash == keyHash, keyTablePair.key == key { From fcd20ade306d0e3c6875fce0ea4470c28c34524e Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 13:35:23 -0800 Subject: [PATCH 05/14] Sample long key hash Replace long-key byte-by-byte FNV hashing with constant-time prefix/suffix sampling in fastKeyHash(bytes:range:) and fastKeyHash(_:). Keep <=8-byte packed hashing unchanged. Preserve exact-key equality checks so hash remains a prefilter only. --- Sources/TOMLDecoder/Parsing/Parser.swift | 33 ++++++++++++++---------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 65e1a32..70ae710 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -1816,15 +1816,17 @@ func normalizeKeyAndHash(bytes: UnsafeBufferPointer, token: Token, keyTra @inline(__always) func fastKeyHash(bytes: UnsafeBufferPointer, range: Range) -> Int { - let offsetBasis: UInt64 = 14_695_981_039_346_656_037 - let prime: UInt64 = 1_099_511_628_211 - let count = range.upperBound - range.lowerBound - if count <= 8, let base = bytes.baseAddress { + if let base = bytes.baseAddress { let start = base.advanced(by: range.lowerBound) - return Int(truncatingIfNeeded: packedKeyHash(UnsafeBufferPointer(start: start, count: count))) + if count <= 8 { + return Int(truncatingIfNeeded: packedKeyHash(UnsafeBufferPointer(start: start, count: count))) + } + return Int(truncatingIfNeeded: sampledKeyHash(start: start, count: count)) } + let offsetBasis: UInt64 = 14_695_981_039_346_656_037 + let prime: UInt64 = 1_099_511_628_211 var hash = offsetBasis var index = range.lowerBound while index < range.upperBound { @@ -1837,23 +1839,20 @@ func fastKeyHash(bytes: UnsafeBufferPointer, range: Range) -> Int { @inline(__always) func fastKeyHash(_ key: String) -> Int { - let offsetBasis: UInt64 = 14_695_981_039_346_656_037 - let prime: UInt64 = 1_099_511_628_211 - if let hash = key.utf8.withContiguousStorageIfAvailable({ buffer -> UInt64 in if buffer.count <= 8 { return packedKeyHash(buffer) } - var hash = offsetBasis - for byte in buffer { - hash ^= UInt64(byte) - hash &*= prime + if let start = buffer.baseAddress { + return sampledKeyHash(start: start, count: buffer.count) } - return hash + return 0 }) { return Int(truncatingIfNeeded: hash) } + let offsetBasis: UInt64 = 14_695_981_039_346_656_037 + let prime: UInt64 = 1_099_511_628_211 var hash = offsetBasis var packed: UInt64 = 0 var count = 0 @@ -1868,6 +1867,14 @@ func fastKeyHash(_ key: String) -> Int { return Int(truncatingIfNeeded: count <= 8 ? packed : hash) } +@inline(__always) +private func sampledKeyHash(start: UnsafePointer, count: Int) -> UInt64 { + let prefix = packedKeyHash(UnsafeBufferPointer(start: start, count: 8)) + let suffix = packedKeyHash(UnsafeBufferPointer(start: start.advanced(by: count - 8), count: 8)) + let rotatedSuffix = (suffix << 1) | (suffix >> 63) + return prefix ^ rotatedSuffix ^ (UInt64(truncatingIfNeeded: count) &* 0x9E37_79B1_85EB_CA87) +} + @inline(__always) private func packedKeyHash(_ buffer: UnsafeBufferPointer) -> UInt64 { var packed: UInt64 = 0 From 14fbb25d50f70f4366b31717c187be3d7241c42c Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 13:45:42 -0800 Subject: [PATCH 06/14] Inline key-value token write Change createKeyValue to accept the parsed value token and append KeyValuePair with its final value in one step. This removes temporary Token.empty staging and follow-up indexed mutation in parseKeyValue while preserving lookup and insertion behavior. --- Sources/TOMLDecoder/Parsing/Parser.swift | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 70ae710..dbc7b20 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -405,14 +405,19 @@ struct Parser: ~Copyable { try nextToken(bytes: bytes, isDotSpecial: isDotSpecial) } - mutating func createKeyValue(bytes: UnsafeBufferPointer, token: Token, inTable tableIndex: Int, isKeyed: Bool) throws(TOMLError) -> Int { + mutating func createKeyValue( + bytes: UnsafeBufferPointer, + token: Token, + value: Token, + inTable tableIndex: Int, + isKeyed: Bool + ) throws(TOMLError) { let (key, keyHash) = try normalizeKeyAndHash(bytes: bytes, token: token, keyTransform: keyTransform) if tableValue(tableIndex: tableIndex, keyed: isKeyed, key: key, keyHash: keyHash) != nil { throw TOMLError(.badKey(lineNumber: token.lineNumber)) } - let kv = KeyValuePair(key: key, keyHash: keyHash, value: Token.empty) let index = keyValues.count - keyValues.append(kv) + keyValues.append(KeyValuePair(key: key, keyHash: keyHash, value: value)) if isKeyed { if keyTables[tableIndex].table.keyValues.isEmpty { @@ -425,7 +430,6 @@ struct Parser: ~Copyable { } tables[tableIndex].keyValues.append(index) } - return index } mutating func createKeyTable(bytes: UnsafeBufferPointer, token: Token, inTable tableIndex: Int, isKeyed: Bool, implicit: Bool = false) throws(TOMLError) -> Int { @@ -857,9 +861,7 @@ struct Parser: ~Copyable { try nextToken(bytes: bytes, isDotSpecial: false) if token.kind == .string || token.kind == .bareKey { - let index = try createKeyValue(bytes: bytes, token: key, inTable: tableIndex, isKeyed: isKeyed) - let value = token - keyValues[index].value = value + try createKeyValue(bytes: bytes, token: key, value: token, inTable: tableIndex, isKeyed: isKeyed) try nextToken(bytes: bytes, isDotSpecial: false) return } From 8c6f0ca86fffacbaecf70294ffa62ea674a2f341 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 13:56:03 -0800 Subject: [PATCH 07/14] Fast-path single-segment selectors Parse the first selector segment directly in parseSelect and branch immediately on closing bracket vs dot. Single-segment selectors now skip tablePath bookkeeping and walkTablePath entirely, while dotted selectors still use the existing path fill/walk logic after seeding the first segment. This removes speculative token rollback from prior attempts and keeps key ownership behavior aligned with existing dotted-path code. --- Sources/TOMLDecoder/Parsing/Parser.swift | 48 +++++++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index dbc7b20..66fc5cc 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -881,13 +881,15 @@ struct Parser: ~Copyable { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "syntax error")) } - mutating func fillTablePath(bytes: UnsafeBufferPointer) throws(TOMLError) -> ( + mutating func fillTablePath(bytes: UnsafeBufferPointer, clearPath: Bool = true) throws(TOMLError) -> ( key: String, keyHash: Int, token: Token ) { let lineNumber = token.lineNumber - tablePath.removeAll(keepingCapacity: true) + if clearPath { + tablePath.removeAll(keepingCapacity: true) + } while true { if token.kind != .string, token.kind != .bareKey { @@ -926,15 +928,49 @@ struct Parser: ~Copyable { try eatToken(bytes: bytes, kind: .lbracket, isDotSpecial: true) } - let (lastKey, lastKeyHash, z) = try fillTablePath(bytes: bytes) - try walkTablePath() + if token.kind != .string, token.kind != .bareKey { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid or missing key")) + } + + let firstToken = token + let (firstKey, firstKeyHash) = try normalizeKeyAndHash( + bytes: bytes, + token: token, + keyTransform: keyTransform + ) + try nextToken(bytes: bytes, isDotSpecial: true) + + let lastKey: String + let lastKeyHash: Int + let lastKeyToken: Token + if token.kind == .rbracket { + lastKey = firstKey + lastKeyHash = firstKeyHash + lastKeyToken = firstToken + currentTable = 0 + currentTableIsKeyed = false + } else { + if token.kind != .dot { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid key")) + } + + tablePath.removeAll(keepingCapacity: true) + tablePath.append((key: firstKey, keyHash: firstKeyHash)) + try nextToken(bytes: bytes, isDotSpecial: true) + + let (key, keyHash, keyToken) = try fillTablePath(bytes: bytes, clearPath: false) + lastKey = key + lastKeyHash = keyHash + lastKeyToken = keyToken + try walkTablePath() + } if !llb { // [x.y.z] -> create z = {} in x.y currentTable = try createKeyTable( normalizedKey: lastKey, keyHash: lastKeyHash, - lineNumber: z.lineNumber, + lineNumber: lastKeyToken.lineNumber, inTable: currentTable, isKeyed: currentTableIsKeyed ) @@ -946,7 +982,7 @@ struct Parser: ~Copyable { maybeArrayIndex = try createKeyArray( normalizedKey: lastKey, keyHash: lastKeyHash, - lineNumber: z.lineNumber, + lineNumber: lastKeyToken.lineNumber, inTable: currentTable, isKeyed: currentTableIsKeyed, kind: .table From 0f8ddd3d0671ef4328b6ccfd534a47f1c3f2d221 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 14:05:02 -0800 Subject: [PATCH 08/14] Skip key materialization in bare array selectors Keep the single-segment selector fast path and add a byte-token lookup path for selectors when no key transform is active. ParseSelect now hashes bare tokens directly, probes array keys without building a String, and only materializes the key when creating the array entry. Dotted selectors and transformed/quoted keys keep existing logic. --- Sources/TOMLDecoder/Parsing/Parser.swift | 254 ++++++++++++++++++----- 1 file changed, 205 insertions(+), 49 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 66fc5cc..7789afa 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -933,75 +933,143 @@ struct Parser: ~Copyable { } let firstToken = token - let (firstKey, firstKeyHash) = try normalizeKeyAndHash( - bytes: bytes, - token: token, - keyTransform: keyTransform - ) + let firstIsBareNoTransform = keyTransform == nil && firstToken.kind == .bareKey + let firstKey: String? + let firstKeyHash: Int + if firstIsBareNoTransform { + firstKey = nil + firstKeyHash = fastKeyHash(bytes: bytes, range: firstToken.text) + } else { + let normalized = try normalizeKeyAndHash( + bytes: bytes, + token: firstToken, + keyTransform: keyTransform + ) + firstKey = normalized.key + firstKeyHash = normalized.keyHash + } try nextToken(bytes: bytes, isDotSpecial: true) - let lastKey: String - let lastKeyHash: Int - let lastKeyToken: Token if token.kind == .rbracket { - lastKey = firstKey - lastKeyHash = firstKeyHash - lastKeyToken = firstToken currentTable = 0 currentTableIsKeyed = false + if !llb { + let key = firstKey ?? makeString(bytes: bytes, range: firstToken.text) + currentTable = try createKeyTable( + normalizedKey: key, + keyHash: firstKeyHash, + lineNumber: firstToken.lineNumber, + inTable: currentTable, + isKeyed: currentTableIsKeyed + ) + currentTableIsKeyed = true + } else { + let arrayIndex: Int + if firstIsBareNoTransform { + if let existingArrayIndex = lookupArray( + in: currentTable, + keyed: currentTableIsKeyed, + bytes: bytes, + token: firstToken, + keyHash: firstKeyHash + ) { + arrayIndex = existingArrayIndex + } else { + let key = makeString(bytes: bytes, range: firstToken.text) + arrayIndex = try createKeyArray( + normalizedKey: key, + keyHash: firstKeyHash, + lineNumber: firstToken.lineNumber, + inTable: currentTable, + isKeyed: currentTableIsKeyed, + kind: .table + ) + } + } else { + let key = firstKey! + var maybeArrayIndex = lookupArray( + in: currentTable, + keyed: currentTableIsKeyed, + key: key, + keyHash: firstKeyHash + ) + if maybeArrayIndex == nil { + maybeArrayIndex = try createKeyArray( + normalizedKey: key, + keyHash: firstKeyHash, + lineNumber: firstToken.lineNumber, + inTable: currentTable, + isKeyed: currentTableIsKeyed, + kind: .table + ) + } + arrayIndex = maybeArrayIndex! + } + if keyArrays[arrayIndex].array.kind != .table { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array mismatch")) + } + + let newTableIndex = tables.count + tables.append(InternalTOMLTable()) + if keyArrays[arrayIndex].array.elements.isEmpty { + keyArrays[arrayIndex].array.elements.reserveCapacity(8) + } + keyArrays[arrayIndex].array.elements.append(.table(lineNumber: token.lineNumber, newTableIndex)) + currentTable = newTableIndex + currentTableIsKeyed = false + } } else { if token.kind != .dot { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid key")) } tablePath.removeAll(keepingCapacity: true) - tablePath.append((key: firstKey, keyHash: firstKeyHash)) + tablePath.append((key: firstKey ?? makeString(bytes: bytes, range: firstToken.text), keyHash: firstKeyHash)) try nextToken(bytes: bytes, isDotSpecial: true) let (key, keyHash, keyToken) = try fillTablePath(bytes: bytes, clearPath: false) - lastKey = key - lastKeyHash = keyHash - lastKeyToken = keyToken try walkTablePath() - } - if !llb { - // [x.y.z] -> create z = {} in x.y - currentTable = try createKeyTable( - normalizedKey: lastKey, - keyHash: lastKeyHash, - lineNumber: lastKeyToken.lineNumber, - inTable: currentTable, - isKeyed: currentTableIsKeyed - ) - currentTableIsKeyed = true - } else { - // [[x.y.z]] -> create z = [] in x.y - var maybeArrayIndex = lookupArray(in: currentTable, keyed: currentTableIsKeyed, key: lastKey, keyHash: lastKeyHash) - if maybeArrayIndex == nil { - maybeArrayIndex = try createKeyArray( - normalizedKey: lastKey, - keyHash: lastKeyHash, - lineNumber: lastKeyToken.lineNumber, + if !llb { + currentTable = try createKeyTable( + normalizedKey: key, + keyHash: keyHash, + lineNumber: keyToken.lineNumber, inTable: currentTable, - isKeyed: currentTableIsKeyed, - kind: .table + isKeyed: currentTableIsKeyed ) - } - let arrayIndex = maybeArrayIndex! - if keyArrays[arrayIndex].array.kind != .table { - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array mismatch")) - } + currentTableIsKeyed = true + } else { + var maybeArrayIndex = lookupArray( + in: currentTable, + keyed: currentTableIsKeyed, + key: key, + keyHash: keyHash + ) + if maybeArrayIndex == nil { + maybeArrayIndex = try createKeyArray( + normalizedKey: key, + keyHash: keyHash, + lineNumber: keyToken.lineNumber, + inTable: currentTable, + isKeyed: currentTableIsKeyed, + kind: .table + ) + } + let arrayIndex = maybeArrayIndex! + if keyArrays[arrayIndex].array.kind != .table { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array mismatch")) + } - // add to z[] - let newTableIndex = tables.count - tables.append(InternalTOMLTable()) - if keyArrays[arrayIndex].array.elements.isEmpty { - keyArrays[arrayIndex].array.elements.reserveCapacity(8) + let newTableIndex = tables.count + tables.append(InternalTOMLTable()) + if keyArrays[arrayIndex].array.elements.isEmpty { + keyArrays[arrayIndex].array.elements.reserveCapacity(8) + } + keyArrays[arrayIndex].array.elements.append(.table(lineNumber: token.lineNumber, newTableIndex)) + currentTable = newTableIndex + currentTableIsKeyed = false } - keyArrays[arrayIndex].array.elements.append(.table(lineNumber: token.lineNumber, newTableIndex)) - currentTable = newTableIndex - currentTableIsKeyed = false } if token.kind != .rbracket { @@ -2003,6 +2071,70 @@ extension Parser { } } + @inline(__always) + func keyMatchesToken(bytes: UnsafeBufferPointer, token: Token, key: borrowing String) -> Bool { + let range = token.text + let count = range.upperBound - range.lowerBound + + if let matches = key.utf8.withContiguousStorageIfAvailable({ keyBuffer -> Bool in + if keyBuffer.count != count { + return false + } + + var keyIndex = 0 + var byteIndex = range.lowerBound + while keyIndex < count { + if keyBuffer[keyIndex] != bytes[byteIndex] { + return false + } + keyIndex += 1 + byteIndex += 1 + } + return true + }) { + return matches + } + + var index = range.lowerBound + for byte in key.utf8 { + if index >= range.upperBound || bytes[index] != byte { + return false + } + index += 1 + } + return index == range.upperBound + } + + @inline(__always) + func matchKeyArray( + in indices: borrowing [Int], + bytes: UnsafeBufferPointer, + token: Token, + keyHash: Int + ) -> Int? { + if indices.isEmpty { + return nil + } + return keyArrays.withUnsafeBufferPointer { keyArrayBuffer -> Int? in + guard let keyArrayBase = keyArrayBuffer.baseAddress else { + return nil + } + let indexCount = indices.count + var i = 0 + while i < indexCount { + let keyArrayIndex = indices[i] + let keyArrayPair = keyArrayBase.advanced(by: keyArrayIndex).pointee + if keyArrayPair.keyHash == keyHash, + keyMatchesToken(bytes: bytes, token: token, key: keyArrayPair.key) + { + return keyArrayIndex + } + i += 1 + } + return nil + } + } + @inline(__always) func matchKeyTable(in indices: borrowing [Int], key: borrowing String, keyHash: Int) -> Int? { if indices.isEmpty { @@ -2079,6 +2211,30 @@ extension Parser { return matchKeyArray(in: tables[tableIndex].arrays, key: key, keyHash: keyHash) } + @inline(__always) + func lookupArray( + in tableIndex: Int, + keyed: Bool, + bytes: UnsafeBufferPointer, + token: Token, + keyHash: Int + ) -> Int? { + if keyed { + return matchKeyArray( + in: keyTables[tableIndex].table.arrays, + bytes: bytes, + token: token, + keyHash: keyHash + ) + } + return matchKeyArray( + in: tables[tableIndex].arrays, + bytes: bytes, + token: token, + keyHash: keyHash + ) + } + mutating func walkTablePath() throws(TOMLError) { var tableIndex = 0 var isKeyed = false From 6aad1387da22f5f2fc6ad67797cc64382d8b4188 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 14:23:29 -0800 Subject: [PATCH 09/14] Tighten scanString hot path Load character-class tables only in branches that use them and split dot-special bare-key scanning into a common bare-key fast path plus plus-sign fallback. This removes per-token setup and branch work in tokenizer scan loops. --- Sources/TOMLDecoder/Parsing/Parser.swift | 26 ++++++++++++++---------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 7789afa..1d2abd8 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -146,9 +146,6 @@ struct Parser: ~Copyable { } func scanString(range: Range, lineNumber: Int) throws(TOMLError) { - let isBareKeyChar = CodeUnits.isBareKeyChar - let isValueChar = CodeUnits.isValueChar - let isBasicStringBodyChar = CodeUnits.isBasicStringBodyChar let start = range.lowerBound let end = range.upperBound let head = bytes[start] @@ -156,6 +153,7 @@ struct Parser: ~Copyable { (head >= CodeUnits.upperA && head <= CodeUnits.upperZ) || head == CodeUnits.underscore { + let isBareKeyChar = CodeUnits.isBareKeyChar var index = start + 1 while index < end { let ch = bytes[index] @@ -261,6 +259,7 @@ struct Parser: ~Copyable { } var i = start + 1 + let isBasicStringBodyChar = CodeUnits.isBasicStringBodyChar // 8x unrolling for double-quoted strings while i + 8 <= end { @@ -359,23 +358,28 @@ struct Parser: ~Copyable { } if isDotSpecial { + let isBareKeyChar = CodeUnits.isBareKeyChar var index = start - var isValidKey = true + while index < end, isBareKeyChar[Int(bytes[index])] { + index += 1 + } + if index >= end || bytes[index] != CodeUnits.plus { + emitToken(kind: .bareKey, start: start, end: index) + return + } + + index += 1 while index < end { let ch = bytes[index] - if isBareKeyChar[Int(ch)] { - index += 1 - continue - } - if ch == CodeUnits.plus { - isValidKey = false + if isBareKeyChar[Int(ch)] || ch == CodeUnits.plus { index += 1 continue } break } - emitToken(kind: isValidKey ? .bareKey : .string, start: start, end: index) + emitToken(kind: .string, start: start, end: index) } else { + let isValueChar = CodeUnits.isValueChar var index = start while index < end { let ch = bytes[index] From e46fc536aa739a391d57932c2db7e4c6e45de234 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 14:31:18 -0800 Subject: [PATCH 10/14] Speed up double-quoted token scan Replace the double-quoted string 8-byte classifier loop with unaligned\nUInt64 chunk probes that detect quote, backslash, and LF bytes with\nbit masks.\n\nKeep the existing bytewise fallback loop and parsing behavior unchanged,\nso only the hot scanning path changes. --- Sources/TOMLDecoder/Parsing/Parser.swift | 50 ++++++++++++++++++------ 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 1d2abd8..276bce4 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -260,18 +260,28 @@ struct Parser: ~Copyable { var i = start + 1 let isBasicStringBodyChar = CodeUnits.isBasicStringBodyChar - - // 8x unrolling for double-quoted strings - while i + 8 <= end { - if !isBasicStringBodyChar[Int(bytes[i])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 1])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 2])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 3])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 4])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 5])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 6])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 7])] { break } - i += 8 + if let baseAddress = bytes.baseAddress { + while i + 8 <= end { + let chunk = UnsafeRawPointer(baseAddress.advanced(by: i)).loadUnaligned( + as: UInt64.self + ) + if chunkContainsDoubleQuotedStopByte(chunk) { + break + } + i += 8 + } + } else { + while i + 8 <= end { + if !isBasicStringBodyChar[Int(bytes[i])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 1])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 2])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 3])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 4])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 5])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 6])] { break } + if !isBasicStringBodyChar[Int(bytes[i + 7])] { break } + i += 8 + } } while i < end { @@ -1870,6 +1880,22 @@ func scanTimezoneOffset(bytes: UnsafeBufferPointer, range: Range) -> return index } +@inline(__always) +private func chunkContainsDoubleQuotedStopByte(_ chunk: UInt64) -> Bool { + let ones: UInt64 = 0x0101_0101_0101_0101 + let highBits: UInt64 = 0x8080_8080_8080_8080 + + @inline(__always) + func hasByte(_ bytePattern: UInt64) -> Bool { + let xor = chunk ^ bytePattern + return ((xor &- ones) & ~xor & highBits) != 0 + } + + return hasByte(0x2222_2222_2222_2222) + || hasByte(0x5C5C_5C5C_5C5C_5C5C) + || hasByte(0x0A0A_0A0A_0A0A_0A0A) +} + func normalizeKeyAndHash(bytes: UnsafeBufferPointer, token: Token, keyTransform: (@Sendable (String) -> String)?) throws(TOMLError) -> (key: String, keyHash: Int) { var start = token.text.lowerBound var end = token.text.upperBound From 3064c0df9834235dd5486c54cfa783d6c38f9691 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 14:39:41 -0800 Subject: [PATCH 11/14] Add table key-hash bloom filter Record two hash bits per inserted key on each internal table and gate table/array/value lookups with fast negative checks before linear scans.\n\nThis keeps behavior unchanged while cutting repeated miss scans during parse-time key insertion and table path traversal. --- Sources/TOMLDecoder/Parsing/Parser.swift | 34 +++++++++++++++++++ .../TOMLDecoder/Parsing/TOMLDocument.swift | 20 +++++++++++ 2 files changed, 54 insertions(+) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 276bce4..13bcac0 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -438,11 +438,13 @@ struct Parser: ~Copyable { keyTables[tableIndex].table.keyValues.reserveCapacity(8) } keyTables[tableIndex].table.keyValues.append(index) + keyTables[tableIndex].table.recordKeyHash(keyHash) } else { if tables[tableIndex].keyValues.isEmpty { tables[tableIndex].keyValues.reserveCapacity(8) } tables[tableIndex].keyValues.append(index) + tables[tableIndex].recordKeyHash(keyHash) } } @@ -490,11 +492,13 @@ struct Parser: ~Copyable { keyTables[tableIndex].table.tables.reserveCapacity(8) } keyTables[tableIndex].table.tables.append(index) + keyTables[tableIndex].table.recordKeyHash(keyHash) } else { if tables[tableIndex].tables.isEmpty { tables[tableIndex].tables.reserveCapacity(8) } tables[tableIndex].tables.append(index) + tables[tableIndex].recordKeyHash(keyHash) } return index } @@ -523,11 +527,13 @@ struct Parser: ~Copyable { keyTables[tableIndex].table.arrays.reserveCapacity(8) } keyTables[tableIndex].table.arrays.append(index) + keyTables[tableIndex].table.recordKeyHash(keyHash) } else { if tables[tableIndex].arrays.isEmpty { tables[tableIndex].arrays.reserveCapacity(8) } tables[tableIndex].arrays.append(index) + tables[tableIndex].recordKeyHash(keyHash) } return index } @@ -2195,6 +2201,14 @@ extension Parser { key: borrowing String, keyHash: Int ) -> InternalTOMLTable.Value? { + if keyed { + if !keyTables[tableIndex].table.mightContainKeyHash(keyHash) { + return nil + } + } else if !tables[tableIndex].mightContainKeyHash(keyHash) { + return nil + } + if keyed { if let keyValueIndex = matchKeyValue(in: keyTables[tableIndex].table.keyValues, key: key, keyHash: keyHash) { return .keyValue(keyValueIndex) @@ -2228,16 +2242,28 @@ extension Parser { @inline(__always) func lookupTable(in tableIndex: Int, keyed: Bool, key: borrowing String, keyHash: Int) -> Int? { if keyed { + if !keyTables[tableIndex].table.mightContainKeyHash(keyHash) { + return nil + } return matchKeyTable(in: keyTables[tableIndex].table.tables, key: key, keyHash: keyHash) } + if !tables[tableIndex].mightContainKeyHash(keyHash) { + return nil + } return matchKeyTable(in: tables[tableIndex].tables, key: key, keyHash: keyHash) } @inline(__always) func lookupArray(in tableIndex: Int, keyed: Bool, key: borrowing String, keyHash: Int) -> Int? { if keyed { + if !keyTables[tableIndex].table.mightContainKeyHash(keyHash) { + return nil + } return matchKeyArray(in: keyTables[tableIndex].table.arrays, key: key, keyHash: keyHash) } + if !tables[tableIndex].mightContainKeyHash(keyHash) { + return nil + } return matchKeyArray(in: tables[tableIndex].arrays, key: key, keyHash: keyHash) } @@ -2250,6 +2276,9 @@ extension Parser { keyHash: Int ) -> Int? { if keyed { + if !keyTables[tableIndex].table.mightContainKeyHash(keyHash) { + return nil + } return matchKeyArray( in: keyTables[tableIndex].table.arrays, bytes: bytes, @@ -2257,6 +2286,9 @@ extension Parser { keyHash: keyHash ) } + if !tables[tableIndex].mightContainKeyHash(keyHash) { + return nil + } return matchKeyArray( in: tables[tableIndex].arrays, bytes: bytes, @@ -2303,11 +2335,13 @@ extension Parser { keyTables[tableIndex].table.tables.reserveCapacity(8) } keyTables[tableIndex].table.tables.append(newTableAddress) + keyTables[tableIndex].table.recordKeyHash(keyHash) } else { if tables[tableIndex].tables.isEmpty { tables[tableIndex].tables.reserveCapacity(8) } tables[tableIndex].tables.append(newTableAddress) + tables[tableIndex].recordKeyHash(keyHash) } tableIndex = newTableAddress isKeyed = true diff --git a/Sources/TOMLDecoder/Parsing/TOMLDocument.swift b/Sources/TOMLDecoder/Parsing/TOMLDocument.swift index ce77a20..189b94c 100644 --- a/Sources/TOMLDecoder/Parsing/TOMLDocument.swift +++ b/Sources/TOMLDecoder/Parsing/TOMLDocument.swift @@ -120,10 +120,22 @@ struct InternalTOMLTable: Equatable, Sendable { var implicit: Bool = false var readOnly: Bool = false var definedByDottedKey: Bool = false + var keyHashBloom: UInt64 = 0 var keyValues: [Int] = [] var arrays: [Int] = [] var tables: [Int] = [] + @inline(__always) + mutating func recordKeyHash(_ keyHash: Int) { + keyHashBloom |= keyHashBloomMask(keyHash) + } + + @inline(__always) + func mightContainKeyHash(_ keyHash: Int) -> Bool { + let mask = keyHashBloomMask(keyHash) + return (keyHashBloom & mask) == mask + } + func allKeys(_ document: TOMLDocument) -> [String] { var keys = [String]() for kv in keyValues { @@ -168,6 +180,14 @@ struct InternalTOMLTable: Equatable, Sendable { } } +@inline(__always) +private func keyHashBloomMask(_ keyHash: Int) -> UInt64 { + let raw = UInt(bitPattern: keyHash) + let bit0 = UInt64(1) << UInt64(raw & 63) + let bit1 = UInt64(1) << UInt64((raw >> 6) & 63) + return bit0 | bit1 +} + struct DateTimeComponents: Equatable { let date: LocalDate? let time: LocalTime? From 2200882fd075076adeab1205aa15f98accec66e0 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 14:48:52 -0800 Subject: [PATCH 12/14] Trim impossible pointer-nil branches Drop optional base-address branches from hot tokenizer/hash paths where parse-time buffers are guaranteed to have storage for non-empty ranges. - remove the fallback byte-by-byte 8-byte pre-scan branch in double-quoted token scanning - short-circuit zero-length hash range and use direct base-address hashing path for non-empty ranges - fast-return empty-range string creation and use direct base-address decoding for non-empty ranges --- Sources/TOMLDecoder/Parsing/Parser.swift | 57 +++++++----------------- 1 file changed, 17 insertions(+), 40 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 13bcac0..85cb699 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -260,28 +260,15 @@ struct Parser: ~Copyable { var i = start + 1 let isBasicStringBodyChar = CodeUnits.isBasicStringBodyChar - if let baseAddress = bytes.baseAddress { - while i + 8 <= end { - let chunk = UnsafeRawPointer(baseAddress.advanced(by: i)).loadUnaligned( - as: UInt64.self - ) - if chunkContainsDoubleQuotedStopByte(chunk) { - break - } - i += 8 - } - } else { - while i + 8 <= end { - if !isBasicStringBodyChar[Int(bytes[i])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 1])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 2])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 3])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 4])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 5])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 6])] { break } - if !isBasicStringBodyChar[Int(bytes[i + 7])] { break } - i += 8 + let baseAddress = bytes.baseAddress! + while i + 8 <= end { + let chunk = UnsafeRawPointer(baseAddress.advanced(by: i)).loadUnaligned( + as: UInt64.self + ) + if chunkContainsDoubleQuotedStopByte(chunk) { + break } + i += 8 } while i < end { @@ -1959,24 +1946,14 @@ func normalizeKeyAndHash(bytes: UnsafeBufferPointer, token: Token, keyTra @inline(__always) func fastKeyHash(bytes: UnsafeBufferPointer, range: Range) -> Int { let count = range.upperBound - range.lowerBound - if let base = bytes.baseAddress { - let start = base.advanced(by: range.lowerBound) - if count <= 8 { - return Int(truncatingIfNeeded: packedKeyHash(UnsafeBufferPointer(start: start, count: count))) - } - return Int(truncatingIfNeeded: sampledKeyHash(start: start, count: count)) + if count == 0 { + return 0 } - - let offsetBasis: UInt64 = 14_695_981_039_346_656_037 - let prime: UInt64 = 1_099_511_628_211 - var hash = offsetBasis - var index = range.lowerBound - while index < range.upperBound { - hash ^= UInt64(bytes[index]) - hash &*= prime - index += 1 + let start = bytes.baseAddress!.advanced(by: range.lowerBound) + if count <= 8 { + return Int(truncatingIfNeeded: packedKeyHash(UnsafeBufferPointer(start: start, count: count))) } - return Int(truncatingIfNeeded: hash) + return Int(truncatingIfNeeded: sampledKeyHash(start: start, count: count)) } @inline(__always) @@ -2052,11 +2029,11 @@ private func packedKeyHash(_ buffer: UnsafeBufferPointer) -> UInt64 { @inline(__always) private func makeString(bytes: UnsafeBufferPointer, range: Range) -> String { - guard let baseAddress = bytes.baseAddress else { + let count = range.upperBound - range.lowerBound + if count == 0 { return "" } - let start = baseAddress.advanced(by: range.lowerBound) - let count = range.upperBound - range.lowerBound + let start = bytes.baseAddress!.advanced(by: range.lowerBound) return String(decoding: UnsafeBufferPointer(start: start, count: count), as: UTF8.self) } From d60ac45510ef46aee3c558f6c56b2620646de694 Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 15:12:39 -0800 Subject: [PATCH 13/14] Hoist first selector path segment Advance the first selector segment directly before tablePath fill/walk. - Reuse the same path-segment advancement logic for first-segment setup and subsequent walk. - Start walkTablePath from the already-resolved table/keyed state. - Keep existing two-phase selector shape while dropping one tablePath append for the first segment. Goal: reduce parseSelect work on table/array selector paths without one-pass ARC regressions. --- Sources/TOMLDecoder/Parsing/Parser.swift | 166 +++++++++++++---------- 1 file changed, 92 insertions(+), 74 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 85cb699..8393d7b 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -508,7 +508,11 @@ struct Parser: ~Copyable { } let index = keyArrays.count - keyArrays.append(KeyArrayPair(key: key, keyHash: keyHash, array: InternalTOMLArray(kind: kind))) + var array = InternalTOMLArray(kind: kind) + if kind == .table { + array.elements.reserveCapacity(8) + } + keyArrays.append(KeyArrayPair(key: key, keyHash: keyHash, array: array)) if isKeyed { if keyTables[tableIndex].table.arrays.isEmpty { keyTables[tableIndex].table.arrays.reserveCapacity(8) @@ -923,15 +927,12 @@ struct Parser: ~Copyable { mutating func parseSelect(bytes: UnsafeBufferPointer) throws(TOMLError) { assert(token.kind == .lbracket) - let index = token.text.lowerBound - let nextIndex = index + 1 - let llb = index < bytes.count - && bytes[index] == CodeUnits.lbracket - && nextIndex < bytes.count - && bytes[nextIndex] == CodeUnits.lbracket - - try eatToken(bytes: bytes, kind: .lbracket, isDotSpecial: true) + let nextIndex = token.text.lowerBound + 1 + let llb = nextIndex < bytes.count && bytes[nextIndex] == CodeUnits.lbracket if llb { + cursor = nextIndex + 1 + try nextToken(bytes: bytes, isDotSpecial: true) + } else { try eatToken(bytes: bytes, kind: .lbracket, isDotSpecial: true) } @@ -994,14 +995,15 @@ struct Parser: ~Copyable { } } else { let key = firstKey! - var maybeArrayIndex = lookupArray( + if let existingArrayIndex = lookupArray( in: currentTable, keyed: currentTableIsKeyed, key: key, keyHash: firstKeyHash - ) - if maybeArrayIndex == nil { - maybeArrayIndex = try createKeyArray( + ) { + arrayIndex = existingArrayIndex + } else { + arrayIndex = try createKeyArray( normalizedKey: key, keyHash: firstKeyHash, lineNumber: firstToken.lineNumber, @@ -1010,7 +1012,6 @@ struct Parser: ~Copyable { kind: .table ) } - arrayIndex = maybeArrayIndex! } if keyArrays[arrayIndex].array.kind != .table { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array mismatch")) @@ -1018,9 +1019,6 @@ struct Parser: ~Copyable { let newTableIndex = tables.count tables.append(InternalTOMLTable()) - if keyArrays[arrayIndex].array.elements.isEmpty { - keyArrays[arrayIndex].array.elements.reserveCapacity(8) - } keyArrays[arrayIndex].array.elements.append(.table(lineNumber: token.lineNumber, newTableIndex)) currentTable = newTableIndex currentTableIsKeyed = false @@ -1030,12 +1028,21 @@ struct Parser: ~Copyable { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid key")) } + let firstPathKey = firstKey ?? makeString(bytes: bytes, range: firstToken.text) + var pathTableIndex = 0 + var pathTableIsKeyed = false + try advanceTablePathSegment( + tableIndex: &pathTableIndex, + isKeyed: &pathTableIsKeyed, + key: firstPathKey, + keyHash: firstKeyHash + ) + tablePath.removeAll(keepingCapacity: true) - tablePath.append((key: firstKey ?? makeString(bytes: bytes, range: firstToken.text), keyHash: firstKeyHash)) try nextToken(bytes: bytes, isDotSpecial: true) let (key, keyHash, keyToken) = try fillTablePath(bytes: bytes, clearPath: false) - try walkTablePath() + try walkTablePath(startTable: pathTableIndex, startKeyed: pathTableIsKeyed) if !llb { currentTable = try createKeyTable( @@ -1047,14 +1054,15 @@ struct Parser: ~Copyable { ) currentTableIsKeyed = true } else { - var maybeArrayIndex = lookupArray( + let arrayIndex: Int = if let existingArrayIndex = lookupArray( in: currentTable, keyed: currentTableIsKeyed, key: key, keyHash: keyHash - ) - if maybeArrayIndex == nil { - maybeArrayIndex = try createKeyArray( + ) { + existingArrayIndex + } else { + try createKeyArray( normalizedKey: key, keyHash: keyHash, lineNumber: keyToken.lineNumber, @@ -1063,16 +1071,12 @@ struct Parser: ~Copyable { kind: .table ) } - let arrayIndex = maybeArrayIndex! if keyArrays[arrayIndex].array.kind != .table { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array mismatch")) } let newTableIndex = tables.count tables.append(InternalTOMLTable()) - if keyArrays[arrayIndex].array.elements.isEmpty { - keyArrays[arrayIndex].array.elements.reserveCapacity(8) - } keyArrays[arrayIndex].array.elements.append(.table(lineNumber: token.lineNumber, newTableIndex)) currentTable = newTableIndex currentTableIsKeyed = false @@ -1084,13 +1088,14 @@ struct Parser: ~Copyable { } if llb { - let nextIndex = token.text.index(after: token.text.startIndex) - guard nextIndex < bytes.count, bytes[nextIndex] == CodeUnits.rbracket else { + guard cursor < bytes.count, bytes[cursor] == CodeUnits.rbracket else { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "expects ]]")) } + cursor += 1 + try nextToken(bytes: bytes, isDotSpecial: true) + } else { try eatToken(bytes: bytes, kind: .rbracket, isDotSpecial: true) } - try eatToken(bytes: bytes, kind: .rbracket, isDotSpecial: true) if token.kind != .newline, token.kind != .eof { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "extra chars after ] or ]]")) @@ -2274,57 +2279,70 @@ extension Parser { ) } - mutating func walkTablePath() throws(TOMLError) { - var tableIndex = 0 - var isKeyed = false - for (key, keyHash) in tablePath { - switch tableValue(tableIndex: tableIndex, keyed: isKeyed, key: key, keyHash: keyHash) { - case let .table(index): - tableIndex = index - isKeyed = true - case let .array(arrayIndex): - let array = keyArrays[arrayIndex].array - guard case .table = array.kind else { - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array element is not a table")) - } + mutating func advanceTablePathSegment( + tableIndex: inout Int, + isKeyed: inout Bool, + key: String, + keyHash: Int + ) throws(TOMLError) { + switch tableValue(tableIndex: tableIndex, keyed: isKeyed, key: key, keyHash: keyHash) { + case let .table(index): + tableIndex = index + isKeyed = true + case let .array(arrayIndex): + let array = keyArrays[arrayIndex].array + guard case .table = array.kind else { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array element is not a table")) + } - if array.elements.isEmpty { - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "empty array")) - } + if array.elements.isEmpty { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "empty array")) + } - guard case let .table(_, index) = array.elements.last else { - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array element is not a table")) - } + guard case let .table(_, index) = array.elements.last else { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array element is not a table")) + } - tableIndex = index - isKeyed = false - case .keyValue: - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "key-value already exists")) - default: - let newTableAddress = keyTables.count - var newTable = InternalTOMLTable() - newTable.implicit = true - newTable.definedByDottedKey = false - keyTables.append(KeyTablePair(key: key, keyHash: keyHash, table: newTable)) - - if isKeyed { - if keyTables[tableIndex].table.tables.isEmpty { - keyTables[tableIndex].table.tables.reserveCapacity(8) - } - keyTables[tableIndex].table.tables.append(newTableAddress) - keyTables[tableIndex].table.recordKeyHash(keyHash) - } else { - if tables[tableIndex].tables.isEmpty { - tables[tableIndex].tables.reserveCapacity(8) - } - tables[tableIndex].tables.append(newTableAddress) - tables[tableIndex].recordKeyHash(keyHash) + tableIndex = index + isKeyed = false + case .keyValue: + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "key-value already exists")) + default: + let newTableAddress = keyTables.count + var newTable = InternalTOMLTable() + newTable.implicit = true + newTable.definedByDottedKey = false + keyTables.append(KeyTablePair(key: key, keyHash: keyHash, table: newTable)) + + if isKeyed { + if keyTables[tableIndex].table.tables.isEmpty { + keyTables[tableIndex].table.tables.reserveCapacity(8) + } + keyTables[tableIndex].table.tables.append(newTableAddress) + keyTables[tableIndex].table.recordKeyHash(keyHash) + } else { + if tables[tableIndex].tables.isEmpty { + tables[tableIndex].tables.reserveCapacity(8) } - tableIndex = newTableAddress - isKeyed = true + tables[tableIndex].tables.append(newTableAddress) + tables[tableIndex].recordKeyHash(keyHash) } + tableIndex = newTableAddress + isKeyed = true } + } + mutating func walkTablePath(startTable: Int = 0, startKeyed: Bool = false) throws(TOMLError) { + var tableIndex = startTable + var isKeyed = startKeyed + for (key, keyHash) in tablePath { + try advanceTablePathSegment( + tableIndex: &tableIndex, + isKeyed: &isKeyed, + key: key, + keyHash: keyHash + ) + } currentTable = tableIndex currentTableIsKeyed = isKeyed } From 245d0c3b077ba83592112d6478674c9d5711ef9f Mon Sep 17 00:00:00 2001 From: Daniel Duan Date: Fri, 6 Feb 2026 15:25:18 -0800 Subject: [PATCH 14/14] Fast-path two-segment selectors Teach parseSelect to special-case dotted selectors with exactly two segments. After walking the first segment, parse the second segment directly; if the selector closes, skip fillTablePath and walkTablePath. Fallback for longer dotted selectors keeps the existing tablePath+walk behavior unchanged. --- Sources/TOMLDecoder/Parsing/Parser.swift | 38 ++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 8393d7b..dab1648 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -1038,11 +1038,43 @@ struct Parser: ~Copyable { keyHash: firstKeyHash ) - tablePath.removeAll(keepingCapacity: true) + try nextToken(bytes: bytes, isDotSpecial: true) + if token.kind != .string, token.kind != .bareKey { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid or missing key")) + } + + let secondToken = token + let (secondKey, secondKeyHash) = try normalizeKeyAndHash( + bytes: bytes, + token: secondToken, + keyTransform: keyTransform + ) try nextToken(bytes: bytes, isDotSpecial: true) - let (key, keyHash, keyToken) = try fillTablePath(bytes: bytes, clearPath: false) - try walkTablePath(startTable: pathTableIndex, startKeyed: pathTableIsKeyed) + let key: String + let keyHash: Int + let keyToken: Token + + if token.kind == .rbracket { + currentTable = pathTableIndex + currentTableIsKeyed = pathTableIsKeyed + key = secondKey + keyHash = secondKeyHash + keyToken = secondToken + } else { + tablePath.removeAll(keepingCapacity: true) + tablePath.append((key: secondKey, keyHash: secondKeyHash)) + if token.kind != .dot { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid key")) + } + try nextToken(bytes: bytes, isDotSpecial: true) + + let terminal = try fillTablePath(bytes: bytes, clearPath: false) + key = terminal.key + keyHash = terminal.keyHash + keyToken = terminal.token + try walkTablePath(startTable: pathTableIndex, startKeyed: pathTableIsKeyed) + } if !llb { currentTable = try createKeyTable(