diff --git a/Sources/TOMLDecoder/Parsing/Constants.swift b/Sources/TOMLDecoder/Parsing/Constants.swift index f5b2261..ee789e5 100644 --- a/Sources/TOMLDecoder/Parsing/Constants.swift +++ b/Sources/TOMLDecoder/Parsing/Constants.swift @@ -84,6 +84,15 @@ enum CodeUnits { return UnsafePointer(ptr) }() + nonisolated(unsafe) static let isBasicStringBodyChar: UnsafePointer = { + let ptr = UnsafeMutablePointer.allocate(capacity: 256) + ptr.initialize(repeating: true, count: 256) + ptr[Int(CodeUnits.backslash)] = false + ptr[Int(CodeUnits.doubleQuote)] = false + ptr[Int(CodeUnits.lf)] = false + return UnsafePointer(ptr) + }() + static let null: UTF8.CodeUnit = 0x00 static let unitSeparator: UTF8.CodeUnit = 0x1F static let delete: UTF8.CodeUnit = 0x7F diff --git a/Sources/TOMLDecoder/Parsing/Parser.swift b/Sources/TOMLDecoder/Parsing/Parser.swift index 49e9353..dab1648 100644 --- a/Sources/TOMLDecoder/Parsing/Parser.swift +++ b/Sources/TOMLDecoder/Parsing/Parser.swift @@ -4,7 +4,7 @@ struct Parser: ~Copyable { var currentLineNumber = 1 var currentTable = 0 var currentTableIsKeyed = false - var tablePath: [(key: String, keyHash: Int, token: Token)] = [] + var tablePath: [(key: String, keyHash: Int)] = [] var tables: [InternalTOMLTable] = [InternalTOMLTable()] var arrays: [InternalTOMLArray] = [] var keyTables: [KeyTablePair] = [] @@ -146,16 +146,16 @@ struct Parser: ~Copyable { } func scanString(range: Range, lineNumber: Int) throws(TOMLError) { - let isBareKeyChar = CodeUnits.isBareKeyChar - let isValueChar = CodeUnits.isValueChar let start = range.lowerBound + let end = range.upperBound let head = bytes[start] if (head >= CodeUnits.lowerA && head <= CodeUnits.lowerZ) || (head >= CodeUnits.upperA && head <= CodeUnits.upperZ) || head == CodeUnits.underscore { + let isBareKeyChar = CodeUnits.isBareKeyChar var index = start + 1 - while index < range.upperBound { + while index < end { let ch = bytes[index] if isBareKeyChar[Int(ch)] { index += 1 @@ -167,88 +167,42 @@ struct Parser: ~Copyable { return } - if start + 3 <= range.upperBound, - bytes[start] == CodeUnits.singleQuote, - bytes[start + 1] == CodeUnits.singleQuote, - bytes[start + 2] == CodeUnits.singleQuote - { - var i = start + 3 - var newlinesInToken = 0 + if head == CodeUnits.singleQuote { + if start + 3 <= end, + bytes[start + 1] == CodeUnits.singleQuote, + bytes[start + 2] == CodeUnits.singleQuote + { + var i = start + 3 + var newlinesInToken = 0 - while i < range.upperBound { - if bytes[i] == CodeUnits.lf { - newlinesInToken += 1 - } - if i + 3 <= range.upperBound, - bytes[i] == CodeUnits.singleQuote, - bytes[i + 1] == CodeUnits.singleQuote, - bytes[i + 2] == CodeUnits.singleQuote - { - if i + 3 >= range.upperBound || bytes[i + 3] != CodeUnits.singleQuote { - break + while i < end { + if bytes[i] == CodeUnits.lf { + newlinesInToken += 1 } - } - i += 1 - } - - guard i < range.upperBound else { - throw TOMLError( - .syntax(lineNumber: lineNumber, message: "unterminated triple-s-quote") - ) - } - - let end = i + 3 - emitToken(kind: .string, start: start, end: end, newlines: newlinesInToken) - return - } - - if start + 3 < range.upperBound, - bytes[start] == CodeUnits.doubleQuote, - bytes[start + 1] == CodeUnits.doubleQuote, - bytes[start + 2] == CodeUnits.doubleQuote - { - var i = start + 3 - let textCount = range.upperBound - var newlinesInToken = 0 - - while i < textCount { - if bytes[i] == CodeUnits.lf { - newlinesInToken += 1 - } - if i + 3 <= textCount, - bytes[i] == CodeUnits.doubleQuote, - bytes[i + 1] == CodeUnits.doubleQuote, - bytes[i + 2] == CodeUnits.doubleQuote - { - // Check if this is exactly 3 quotes (not part of a longer sequence) - if i + 3 >= textCount || bytes[i + 3] != CodeUnits.doubleQuote { - if bytes[i - 1] == CodeUnits.backslash { - i += 1 - continue + if i + 3 <= end, + bytes[i] == CodeUnits.singleQuote, + bytes[i + 1] == CodeUnits.singleQuote, + bytes[i + 2] == CodeUnits.singleQuote + { + if i + 3 >= end || bytes[i + 3] != CodeUnits.singleQuote { + break } - break } + i += 1 } - i += 1 - } - guard i < range.upperBound else { - throw TOMLError( - .syntax(lineNumber: lineNumber, message: "unterminated triple-d-quote") - ) - } + guard i < end else { + throw TOMLError( + .syntax(lineNumber: lineNumber, message: "unterminated triple-s-quote") + ) + } - let end = i + 3 - emitToken(kind: .string, start: start, end: end, newlines: newlinesInToken) - return - } + emitToken(kind: .string, start: start, end: i + 3, newlines: newlinesInToken) + return + } - let ch = bytes[start] - if ch == CodeUnits.singleQuote { var i = start + 1 - let textCount = range.upperBound - - while i < textCount { + while i < end { let ch = bytes[i] if ch == CodeUnits.singleQuote || ch == CodeUnits.lf { break @@ -256,7 +210,7 @@ struct Parser: ~Copyable { i += 1 } - if i >= textCount || bytes[i] != CodeUnits.singleQuote { + if i >= end || bytes[i] != CodeUnits.singleQuote { throw TOMLError( .syntax(lineNumber: lineNumber, message: "unterminated s-quote") ) @@ -266,40 +220,76 @@ struct Parser: ~Copyable { return } - if ch == CodeUnits.doubleQuote { - var i = start + 1 + if head == CodeUnits.doubleQuote { + if start + 3 < end, + bytes[start + 1] == CodeUnits.doubleQuote, + bytes[start + 2] == CodeUnits.doubleQuote + { + var i = start + 3 + var newlinesInToken = 0 + + while i < end { + if bytes[i] == CodeUnits.lf { + newlinesInToken += 1 + } + if i + 3 <= end, + bytes[i] == CodeUnits.doubleQuote, + bytes[i + 1] == CodeUnits.doubleQuote, + bytes[i + 2] == CodeUnits.doubleQuote + { + if i + 3 >= end || bytes[i + 3] != CodeUnits.doubleQuote { + if bytes[i - 1] == CodeUnits.backslash { + i += 1 + continue + } + break + } + } + i += 1 + } + + guard i < end else { + throw TOMLError( + .syntax(lineNumber: lineNumber, message: "unterminated triple-d-quote") + ) + } - // 8x unrolling for double-quoted strings - while i + 8 <= range.upperBound { - if bytes[i] == CodeUnits.backslash || bytes[i] == CodeUnits.doubleQuote || bytes[i] == CodeUnits.lf { break } - if bytes[i + 1] == CodeUnits.backslash || bytes[i + 1] == CodeUnits.doubleQuote || bytes[i + 1] == CodeUnits.lf { break } - if bytes[i + 2] == CodeUnits.backslash || bytes[i + 2] == CodeUnits.doubleQuote || bytes[i + 2] == CodeUnits.lf { break } - if bytes[i + 3] == CodeUnits.backslash || bytes[i + 3] == CodeUnits.doubleQuote || bytes[i + 3] == CodeUnits.lf { break } - if bytes[i + 4] == CodeUnits.backslash || bytes[i + 4] == CodeUnits.doubleQuote || bytes[i + 4] == CodeUnits.lf { break } - if bytes[i + 5] == CodeUnits.backslash || bytes[i + 5] == CodeUnits.doubleQuote || bytes[i + 5] == CodeUnits.lf { break } - if bytes[i + 6] == CodeUnits.backslash || bytes[i + 6] == CodeUnits.doubleQuote || bytes[i + 6] == CodeUnits.lf { break } - if bytes[i + 7] == CodeUnits.backslash || bytes[i + 7] == CodeUnits.doubleQuote || bytes[i + 7] == CodeUnits.lf { break } + emitToken(kind: .string, start: start, end: i + 3, newlines: newlinesInToken) + return + } + + var i = start + 1 + let isBasicStringBodyChar = CodeUnits.isBasicStringBodyChar + let baseAddress = bytes.baseAddress! + while i + 8 <= end { + let chunk = UnsafeRawPointer(baseAddress.advanced(by: i)).loadUnaligned( + as: UInt64.self + ) + if chunkContainsDoubleQuotedStopByte(chunk) { + break + } i += 8 } - while i < range.upperBound { + while i < end { let ch = bytes[i] + if isBasicStringBodyChar[Int(ch)] { + i += 1 + continue + } if ch == CodeUnits.backslash { i += 1 - if i < range.upperBound { + if i < end { i += 1 continue } break } - if ch == CodeUnits.lf || ch == CodeUnits.doubleQuote { - break - } - i += 1 + break } - if i >= range.upperBound || bytes[i] != CodeUnits.doubleQuote { + if i >= end || bytes[i] != CodeUnits.doubleQuote { throw TOMLError( .syntax(lineNumber: lineNumber, message: "unterminated quote") ) @@ -313,34 +303,34 @@ struct Parser: ~Copyable { var index = start var dateEnder: Int? // Fast path: Dates must produce YYYY-MM-DD, so checks for the dash - if start + 4 < range.upperBound && bytes[start + 4] == CodeUnits.minus { + if start + 4 < end && bytes[start + 4] == CodeUnits.minus { dateEnder = scanDate(bytes: bytes, range: range)?.3 } - if let dateEnder, dateEnder < range.upperBound, + if let dateEnder, dateEnder < end, bytes[dateEnder] == CodeUnits.upperT || bytes[dateEnder] == CodeUnits.lowerT || bytes[dateEnder] == CodeUnits.space { let timeStarter = dateEnder + 1 if let timeEnder = scanTime( - bytes: bytes, range: timeStarter ..< range.upperBound + bytes: bytes, range: timeStarter ..< end )?.3 { index = timeEnder } } else if let dateEnder { index = dateEnder - } else if start + 2 < range.upperBound, bytes[start + 2] == CodeUnits.colon, + } else if start + 2 < end, bytes[start + 2] == CodeUnits.colon, let timeEnder = scanTime( - bytes: bytes, range: start ..< range.upperBound + bytes: bytes, range: start ..< end )?.3 { index = timeEnder } if index > start { - if index < range.upperBound { + if index < end { if bytes[index] == CodeUnits.dot { index += 1 - while index < range.upperBound, bytes[index] >= CodeUnits.number0, + while index < end, bytes[index] >= CodeUnits.number0, bytes[index] <= CodeUnits.number9 { index += 1 @@ -349,15 +339,13 @@ struct Parser: ~Copyable { if bytes[index] == CodeUnits.upperZ || bytes[index] == CodeUnits.lowerZ { index += 1 } else if let timzoneEnder = scanTimezoneOffset( - bytes: bytes, range: index ..< range.upperBound + bytes: bytes, range: index ..< end ) { index = timzoneEnder } } // squeeze out any spaces at end of string - while index >= start, - bytes[index - 1] == CodeUnits.space - { + while index > start, bytes[index - 1] == CodeUnits.space { index -= 1 } // tokenize @@ -367,25 +355,30 @@ struct Parser: ~Copyable { } if isDotSpecial { + let isBareKeyChar = CodeUnits.isBareKeyChar var index = start - var isValidKey = true - while index < range.upperBound { + while index < end, isBareKeyChar[Int(bytes[index])] { + index += 1 + } + if index >= end || bytes[index] != CodeUnits.plus { + emitToken(kind: .bareKey, start: start, end: index) + return + } + + index += 1 + while index < end { let ch = bytes[index] - if isBareKeyChar[Int(ch)] { - index += 1 - continue - } - if ch == CodeUnits.plus { - isValidKey = false + if isBareKeyChar[Int(ch)] || ch == CodeUnits.plus { index += 1 continue } break } - emitToken(kind: isValidKey ? .bareKey : .string, start: start, end: index) + emitToken(kind: .string, start: start, end: index) } else { + let isValueChar = CodeUnits.isValueChar var index = start - while index < range.upperBound { + while index < end { let ch = bytes[index] if isValueChar[Int(ch)] { index += 1 @@ -413,27 +406,33 @@ struct Parser: ~Copyable { try nextToken(bytes: bytes, isDotSpecial: isDotSpecial) } - mutating func createKeyValue(bytes: UnsafeBufferPointer, token: Token, inTable tableIndex: Int, isKeyed: Bool) throws(TOMLError) -> Int { + mutating func createKeyValue( + bytes: UnsafeBufferPointer, + token: Token, + value: Token, + inTable tableIndex: Int, + isKeyed: Bool + ) throws(TOMLError) { let (key, keyHash) = try normalizeKeyAndHash(bytes: bytes, token: token, keyTransform: keyTransform) if tableValue(tableIndex: tableIndex, keyed: isKeyed, key: key, keyHash: keyHash) != nil { throw TOMLError(.badKey(lineNumber: token.lineNumber)) } - let kv = KeyValuePair(key: key, keyHash: keyHash, value: Token.empty) let index = keyValues.count - keyValues.append(kv) + keyValues.append(KeyValuePair(key: key, keyHash: keyHash, value: value)) if isKeyed { if keyTables[tableIndex].table.keyValues.isEmpty { keyTables[tableIndex].table.keyValues.reserveCapacity(8) } keyTables[tableIndex].table.keyValues.append(index) + keyTables[tableIndex].table.recordKeyHash(keyHash) } else { if tables[tableIndex].keyValues.isEmpty { tables[tableIndex].keyValues.reserveCapacity(8) } tables[tableIndex].keyValues.append(index) + tables[tableIndex].recordKeyHash(keyHash) } - return index } mutating func createKeyTable(bytes: UnsafeBufferPointer, token: Token, inTable tableIndex: Int, isKeyed: Bool, implicit: Bool = false) throws(TOMLError) -> Int { @@ -480,11 +479,13 @@ struct Parser: ~Copyable { keyTables[tableIndex].table.tables.reserveCapacity(8) } keyTables[tableIndex].table.tables.append(index) + keyTables[tableIndex].table.recordKeyHash(keyHash) } else { if tables[tableIndex].tables.isEmpty { tables[tableIndex].tables.reserveCapacity(8) } tables[tableIndex].tables.append(index) + tables[tableIndex].recordKeyHash(keyHash) } return index } @@ -507,17 +508,23 @@ struct Parser: ~Copyable { } let index = keyArrays.count - keyArrays.append(KeyArrayPair(key: key, keyHash: keyHash, array: InternalTOMLArray(kind: kind))) + var array = InternalTOMLArray(kind: kind) + if kind == .table { + array.elements.reserveCapacity(8) + } + keyArrays.append(KeyArrayPair(key: key, keyHash: keyHash, array: array)) if isKeyed { if keyTables[tableIndex].table.arrays.isEmpty { keyTables[tableIndex].table.arrays.reserveCapacity(8) } keyTables[tableIndex].table.arrays.append(index) + keyTables[tableIndex].table.recordKeyHash(keyHash) } else { if tables[tableIndex].arrays.isEmpty { tables[tableIndex].arrays.reserveCapacity(8) } tables[tableIndex].arrays.append(index) + tables[tableIndex].recordKeyHash(keyHash) } return index } @@ -865,9 +872,7 @@ struct Parser: ~Copyable { try nextToken(bytes: bytes, isDotSpecial: false) if token.kind == .string || token.kind == .bareKey { - let index = try createKeyValue(bytes: bytes, token: key, inTable: tableIndex, isKeyed: isKeyed) - let value = token - keyValues[index].value = value + try createKeyValue(bytes: bytes, token: key, value: token, inTable: tableIndex, isKeyed: isKeyed) try nextToken(bytes: bytes, isDotSpecial: false) return } @@ -887,91 +892,227 @@ struct Parser: ~Copyable { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "syntax error")) } - mutating func fillTablePath(bytes: UnsafeBufferPointer) throws(TOMLError) { + mutating func fillTablePath(bytes: UnsafeBufferPointer, clearPath: Bool = true) throws(TOMLError) -> ( + key: String, + keyHash: Int, + token: Token + ) { let lineNumber = token.lineNumber - tablePath.removeAll(keepingCapacity: true) + if clearPath { + tablePath.removeAll(keepingCapacity: true) + } while true { if token.kind != .string, token.kind != .bareKey { throw TOMLError(.syntax(lineNumber: lineNumber, message: "invalid or missing key")) } + let currentToken = token let (key, keyHash) = try normalizeKeyAndHash(bytes: bytes, token: token, keyTransform: keyTransform) - tablePath.append((key: key, keyHash: keyHash, token: token)) try nextToken(bytes: bytes, isDotSpecial: true) if token.kind == .rbracket { - break + return (key: key, keyHash: keyHash, token: currentToken) } + tablePath.append((key: key, keyHash: keyHash)) + if token.kind != .dot { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid key")) } try nextToken(bytes: bytes, isDotSpecial: true) } - if tablePath.isEmpty { - throw TOMLError(.syntax(lineNumber: lineNumber, message: "empty table selector")) - } } mutating func parseSelect(bytes: UnsafeBufferPointer) throws(TOMLError) { assert(token.kind == .lbracket) - let index = token.text.lowerBound - let nextIndex = index + 1 - let llb = index < bytes.count - && bytes[index] == CodeUnits.lbracket - && nextIndex < bytes.count - && bytes[nextIndex] == CodeUnits.lbracket - - try eatToken(bytes: bytes, kind: .lbracket, isDotSpecial: true) + let nextIndex = token.text.lowerBound + 1 + let llb = nextIndex < bytes.count && bytes[nextIndex] == CodeUnits.lbracket if llb { + cursor = nextIndex + 1 + try nextToken(bytes: bytes, isDotSpecial: true) + } else { try eatToken(bytes: bytes, kind: .lbracket, isDotSpecial: true) } - try fillTablePath(bytes: bytes) - - // For [x.y.z] or [[x.y.z]], remove z from tpath. - let (lastKey, lastKeyHash, z) = tablePath.removeLast() - try walkTablePath() + if token.kind != .string, token.kind != .bareKey { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid or missing key")) + } - if !llb { - // [x.y.z] -> create z = {} in x.y - currentTable = try createKeyTable( - normalizedKey: lastKey, - keyHash: lastKeyHash, - lineNumber: z.lineNumber, - inTable: currentTable, - isKeyed: currentTableIsKeyed - ) - currentTableIsKeyed = true + let firstToken = token + let firstIsBareNoTransform = keyTransform == nil && firstToken.kind == .bareKey + let firstKey: String? + let firstKeyHash: Int + if firstIsBareNoTransform { + firstKey = nil + firstKeyHash = fastKeyHash(bytes: bytes, range: firstToken.text) } else { - // [[x.y.z]] -> create z = [] in x.y - var maybeArrayIndex = lookupArray(in: currentTable, keyed: currentTableIsKeyed, key: lastKey, keyHash: lastKeyHash) - if maybeArrayIndex == nil { - maybeArrayIndex = try createKeyArray( - normalizedKey: lastKey, - keyHash: lastKeyHash, - lineNumber: z.lineNumber, + let normalized = try normalizeKeyAndHash( + bytes: bytes, + token: firstToken, + keyTransform: keyTransform + ) + firstKey = normalized.key + firstKeyHash = normalized.keyHash + } + try nextToken(bytes: bytes, isDotSpecial: true) + + if token.kind == .rbracket { + currentTable = 0 + currentTableIsKeyed = false + if !llb { + let key = firstKey ?? makeString(bytes: bytes, range: firstToken.text) + currentTable = try createKeyTable( + normalizedKey: key, + keyHash: firstKeyHash, + lineNumber: firstToken.lineNumber, inTable: currentTable, - isKeyed: currentTableIsKeyed, - kind: .table + isKeyed: currentTableIsKeyed ) + currentTableIsKeyed = true + } else { + let arrayIndex: Int + if firstIsBareNoTransform { + if let existingArrayIndex = lookupArray( + in: currentTable, + keyed: currentTableIsKeyed, + bytes: bytes, + token: firstToken, + keyHash: firstKeyHash + ) { + arrayIndex = existingArrayIndex + } else { + let key = makeString(bytes: bytes, range: firstToken.text) + arrayIndex = try createKeyArray( + normalizedKey: key, + keyHash: firstKeyHash, + lineNumber: firstToken.lineNumber, + inTable: currentTable, + isKeyed: currentTableIsKeyed, + kind: .table + ) + } + } else { + let key = firstKey! + if let existingArrayIndex = lookupArray( + in: currentTable, + keyed: currentTableIsKeyed, + key: key, + keyHash: firstKeyHash + ) { + arrayIndex = existingArrayIndex + } else { + arrayIndex = try createKeyArray( + normalizedKey: key, + keyHash: firstKeyHash, + lineNumber: firstToken.lineNumber, + inTable: currentTable, + isKeyed: currentTableIsKeyed, + kind: .table + ) + } + } + if keyArrays[arrayIndex].array.kind != .table { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array mismatch")) + } + + let newTableIndex = tables.count + tables.append(InternalTOMLTable()) + keyArrays[arrayIndex].array.elements.append(.table(lineNumber: token.lineNumber, newTableIndex)) + currentTable = newTableIndex + currentTableIsKeyed = false } - let arrayIndex = maybeArrayIndex! - if keyArrays[arrayIndex].array.kind != .table { - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array mismatch")) + } else { + if token.kind != .dot { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid key")) } - // add to z[] - let newTableIndex = tables.count - tables.append(InternalTOMLTable()) - if keyArrays[arrayIndex].array.elements.isEmpty { - keyArrays[arrayIndex].array.elements.reserveCapacity(8) + let firstPathKey = firstKey ?? makeString(bytes: bytes, range: firstToken.text) + var pathTableIndex = 0 + var pathTableIsKeyed = false + try advanceTablePathSegment( + tableIndex: &pathTableIndex, + isKeyed: &pathTableIsKeyed, + key: firstPathKey, + keyHash: firstKeyHash + ) + + try nextToken(bytes: bytes, isDotSpecial: true) + if token.kind != .string, token.kind != .bareKey { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid or missing key")) + } + + let secondToken = token + let (secondKey, secondKeyHash) = try normalizeKeyAndHash( + bytes: bytes, + token: secondToken, + keyTransform: keyTransform + ) + try nextToken(bytes: bytes, isDotSpecial: true) + + let key: String + let keyHash: Int + let keyToken: Token + + if token.kind == .rbracket { + currentTable = pathTableIndex + currentTableIsKeyed = pathTableIsKeyed + key = secondKey + keyHash = secondKeyHash + keyToken = secondToken + } else { + tablePath.removeAll(keepingCapacity: true) + tablePath.append((key: secondKey, keyHash: secondKeyHash)) + if token.kind != .dot { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "invalid key")) + } + try nextToken(bytes: bytes, isDotSpecial: true) + + let terminal = try fillTablePath(bytes: bytes, clearPath: false) + key = terminal.key + keyHash = terminal.keyHash + keyToken = terminal.token + try walkTablePath(startTable: pathTableIndex, startKeyed: pathTableIsKeyed) + } + + if !llb { + currentTable = try createKeyTable( + normalizedKey: key, + keyHash: keyHash, + lineNumber: keyToken.lineNumber, + inTable: currentTable, + isKeyed: currentTableIsKeyed + ) + currentTableIsKeyed = true + } else { + let arrayIndex: Int = if let existingArrayIndex = lookupArray( + in: currentTable, + keyed: currentTableIsKeyed, + key: key, + keyHash: keyHash + ) { + existingArrayIndex + } else { + try createKeyArray( + normalizedKey: key, + keyHash: keyHash, + lineNumber: keyToken.lineNumber, + inTable: currentTable, + isKeyed: currentTableIsKeyed, + kind: .table + ) + } + if keyArrays[arrayIndex].array.kind != .table { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array mismatch")) + } + + let newTableIndex = tables.count + tables.append(InternalTOMLTable()) + keyArrays[arrayIndex].array.elements.append(.table(lineNumber: token.lineNumber, newTableIndex)) + currentTable = newTableIndex + currentTableIsKeyed = false } - keyArrays[arrayIndex].array.elements.append(.table(lineNumber: token.lineNumber, newTableIndex)) - currentTable = newTableIndex - currentTableIsKeyed = false } if token.kind != .rbracket { @@ -979,13 +1120,14 @@ struct Parser: ~Copyable { } if llb { - let nextIndex = token.text.index(after: token.text.startIndex) - guard nextIndex < bytes.count, bytes[nextIndex] == CodeUnits.rbracket else { + guard cursor < bytes.count, bytes[cursor] == CodeUnits.rbracket else { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "expects ]]")) } + cursor += 1 + try nextToken(bytes: bytes, isDotSpecial: true) + } else { try eatToken(bytes: bytes, kind: .rbracket, isDotSpecial: true) } - try eatToken(bytes: bytes, kind: .rbracket, isDotSpecial: true) if token.kind != .newline, token.kind != .eof { throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "extra chars after ] or ]]")) @@ -1768,6 +1910,22 @@ func scanTimezoneOffset(bytes: UnsafeBufferPointer, range: Range) -> return index } +@inline(__always) +private func chunkContainsDoubleQuotedStopByte(_ chunk: UInt64) -> Bool { + let ones: UInt64 = 0x0101_0101_0101_0101 + let highBits: UInt64 = 0x8080_8080_8080_8080 + + @inline(__always) + func hasByte(_ bytePattern: UInt64) -> Bool { + let xor = chunk ^ bytePattern + return ((xor &- ones) & ~xor & highBits) != 0 + } + + return hasByte(0x2222_2222_2222_2222) + || hasByte(0x5C5C_5C5C_5C5C_5C5C) + || hasByte(0x0A0A_0A0A_0A0A_0A0A) +} + func normalizeKeyAndHash(bytes: UnsafeBufferPointer, token: Token, keyTransform: (@Sendable (String) -> String)?) throws(TOMLError) -> (key: String, keyHash: Int) { var start = token.text.lowerBound var end = token.text.upperBound @@ -1824,44 +1982,33 @@ func normalizeKeyAndHash(bytes: UnsafeBufferPointer, token: Token, keyTra @inline(__always) func fastKeyHash(bytes: UnsafeBufferPointer, range: Range) -> Int { - let offsetBasis: UInt64 = 14_695_981_039_346_656_037 - let prime: UInt64 = 1_099_511_628_211 - let count = range.upperBound - range.lowerBound - if count <= 8, let base = bytes.baseAddress { - let start = base.advanced(by: range.lowerBound) - return Int(truncatingIfNeeded: packedKeyHash(UnsafeBufferPointer(start: start, count: count))) + if count == 0 { + return 0 } - - var hash = offsetBasis - var index = range.lowerBound - while index < range.upperBound { - hash ^= UInt64(bytes[index]) - hash &*= prime - index += 1 + let start = bytes.baseAddress!.advanced(by: range.lowerBound) + if count <= 8 { + return Int(truncatingIfNeeded: packedKeyHash(UnsafeBufferPointer(start: start, count: count))) } - return Int(truncatingIfNeeded: hash) + return Int(truncatingIfNeeded: sampledKeyHash(start: start, count: count)) } @inline(__always) func fastKeyHash(_ key: String) -> Int { - let offsetBasis: UInt64 = 14_695_981_039_346_656_037 - let prime: UInt64 = 1_099_511_628_211 - if let hash = key.utf8.withContiguousStorageIfAvailable({ buffer -> UInt64 in if buffer.count <= 8 { return packedKeyHash(buffer) } - var hash = offsetBasis - for byte in buffer { - hash ^= UInt64(byte) - hash &*= prime + if let start = buffer.baseAddress { + return sampledKeyHash(start: start, count: buffer.count) } - return hash + return 0 }) { return Int(truncatingIfNeeded: hash) } + let offsetBasis: UInt64 = 14_695_981_039_346_656_037 + let prime: UInt64 = 1_099_511_628_211 var hash = offsetBasis var packed: UInt64 = 0 var count = 0 @@ -1876,6 +2023,14 @@ func fastKeyHash(_ key: String) -> Int { return Int(truncatingIfNeeded: count <= 8 ? packed : hash) } +@inline(__always) +private func sampledKeyHash(start: UnsafePointer, count: Int) -> UInt64 { + let prefix = packedKeyHash(UnsafeBufferPointer(start: start, count: 8)) + let suffix = packedKeyHash(UnsafeBufferPointer(start: start.advanced(by: count - 8), count: 8)) + let rotatedSuffix = (suffix << 1) | (suffix >> 63) + return prefix ^ rotatedSuffix ^ (UInt64(truncatingIfNeeded: count) &* 0x9E37_79B1_85EB_CA87) +} + @inline(__always) private func packedKeyHash(_ buffer: UnsafeBufferPointer) -> UInt64 { var packed: UInt64 = 0 @@ -1911,11 +2066,11 @@ private func packedKeyHash(_ buffer: UnsafeBufferPointer) -> UInt64 { @inline(__always) private func makeString(bytes: UnsafeBufferPointer, range: Range) -> String { - guard let baseAddress = bytes.baseAddress else { + let count = range.upperBound - range.lowerBound + if count == 0 { return "" } - let start = baseAddress.advanced(by: range.lowerBound) - let count = range.upperBound - range.lowerBound + let start = bytes.baseAddress!.advanced(by: range.lowerBound) return String(decoding: UnsafeBufferPointer(start: start, count: count), as: UTF8.self) } @@ -1929,8 +2084,9 @@ extension Parser { guard let keyValueBase = keyValueBuffer.baseAddress else { return nil } + let indexCount = indices.count var i = 0 - while i < indices.count { + while i < indexCount { let keyValueIndex = indices[i] let keyValuePair = keyValueBase.advanced(by: keyValueIndex).pointee if keyValuePair.keyHash == keyHash, keyValuePair.key == key { @@ -1951,8 +2107,9 @@ extension Parser { guard let keyArrayBase = keyArrayBuffer.baseAddress else { return nil } + let indexCount = indices.count var i = 0 - while i < indices.count { + while i < indexCount { let keyArrayIndex = indices[i] let keyArrayPair = keyArrayBase.advanced(by: keyArrayIndex).pointee if keyArrayPair.keyHash == keyHash, keyArrayPair.key == key { @@ -1964,6 +2121,70 @@ extension Parser { } } + @inline(__always) + func keyMatchesToken(bytes: UnsafeBufferPointer, token: Token, key: borrowing String) -> Bool { + let range = token.text + let count = range.upperBound - range.lowerBound + + if let matches = key.utf8.withContiguousStorageIfAvailable({ keyBuffer -> Bool in + if keyBuffer.count != count { + return false + } + + var keyIndex = 0 + var byteIndex = range.lowerBound + while keyIndex < count { + if keyBuffer[keyIndex] != bytes[byteIndex] { + return false + } + keyIndex += 1 + byteIndex += 1 + } + return true + }) { + return matches + } + + var index = range.lowerBound + for byte in key.utf8 { + if index >= range.upperBound || bytes[index] != byte { + return false + } + index += 1 + } + return index == range.upperBound + } + + @inline(__always) + func matchKeyArray( + in indices: borrowing [Int], + bytes: UnsafeBufferPointer, + token: Token, + keyHash: Int + ) -> Int? { + if indices.isEmpty { + return nil + } + return keyArrays.withUnsafeBufferPointer { keyArrayBuffer -> Int? in + guard let keyArrayBase = keyArrayBuffer.baseAddress else { + return nil + } + let indexCount = indices.count + var i = 0 + while i < indexCount { + let keyArrayIndex = indices[i] + let keyArrayPair = keyArrayBase.advanced(by: keyArrayIndex).pointee + if keyArrayPair.keyHash == keyHash, + keyMatchesToken(bytes: bytes, token: token, key: keyArrayPair.key) + { + return keyArrayIndex + } + i += 1 + } + return nil + } + } + @inline(__always) func matchKeyTable(in indices: borrowing [Int], key: borrowing String, keyHash: Int) -> Int? { if indices.isEmpty { @@ -1973,8 +2194,9 @@ extension Parser { guard let keyTableBase = keyTableBuffer.baseAddress else { return nil } + let indexCount = indices.count var i = 0 - while i < indices.count { + while i < indexCount { let keyTableIndex = indices[i] let keyTablePair = keyTableBase.advanced(by: keyTableIndex).pointee if keyTablePair.keyHash == keyHash, keyTablePair.key == key { @@ -1993,6 +2215,14 @@ extension Parser { key: borrowing String, keyHash: Int ) -> InternalTOMLTable.Value? { + if keyed { + if !keyTables[tableIndex].table.mightContainKeyHash(keyHash) { + return nil + } + } else if !tables[tableIndex].mightContainKeyHash(keyHash) { + return nil + } + if keyed { if let keyValueIndex = matchKeyValue(in: keyTables[tableIndex].table.keyValues, key: key, keyHash: keyHash) { return .keyValue(keyValueIndex) @@ -2026,68 +2256,125 @@ extension Parser { @inline(__always) func lookupTable(in tableIndex: Int, keyed: Bool, key: borrowing String, keyHash: Int) -> Int? { if keyed { + if !keyTables[tableIndex].table.mightContainKeyHash(keyHash) { + return nil + } return matchKeyTable(in: keyTables[tableIndex].table.tables, key: key, keyHash: keyHash) } + if !tables[tableIndex].mightContainKeyHash(keyHash) { + return nil + } return matchKeyTable(in: tables[tableIndex].tables, key: key, keyHash: keyHash) } @inline(__always) func lookupArray(in tableIndex: Int, keyed: Bool, key: borrowing String, keyHash: Int) -> Int? { if keyed { + if !keyTables[tableIndex].table.mightContainKeyHash(keyHash) { + return nil + } return matchKeyArray(in: keyTables[tableIndex].table.arrays, key: key, keyHash: keyHash) } + if !tables[tableIndex].mightContainKeyHash(keyHash) { + return nil + } return matchKeyArray(in: tables[tableIndex].arrays, key: key, keyHash: keyHash) } - mutating func walkTablePath() throws(TOMLError) { - var tableIndex = 0 - var isKeyed = false - for (key, keyHash, _) in tablePath { - switch tableValue(tableIndex: tableIndex, keyed: isKeyed, key: key, keyHash: keyHash) { - case let .table(index): - tableIndex = index - isKeyed = true - case let .array(arrayIndex): - let array = keyArrays[arrayIndex].array - guard case .table = array.kind else { - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array element is not a table")) - } + @inline(__always) + func lookupArray( + in tableIndex: Int, + keyed: Bool, + bytes: UnsafeBufferPointer, + token: Token, + keyHash: Int + ) -> Int? { + if keyed { + if !keyTables[tableIndex].table.mightContainKeyHash(keyHash) { + return nil + } + return matchKeyArray( + in: keyTables[tableIndex].table.arrays, + bytes: bytes, + token: token, + keyHash: keyHash + ) + } + if !tables[tableIndex].mightContainKeyHash(keyHash) { + return nil + } + return matchKeyArray( + in: tables[tableIndex].arrays, + bytes: bytes, + token: token, + keyHash: keyHash + ) + } - if array.elements.isEmpty { - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "empty array")) - } + mutating func advanceTablePathSegment( + tableIndex: inout Int, + isKeyed: inout Bool, + key: String, + keyHash: Int + ) throws(TOMLError) { + switch tableValue(tableIndex: tableIndex, keyed: isKeyed, key: key, keyHash: keyHash) { + case let .table(index): + tableIndex = index + isKeyed = true + case let .array(arrayIndex): + let array = keyArrays[arrayIndex].array + guard case .table = array.kind else { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array element is not a table")) + } - guard case let .table(_, index) = array.elements.last else { - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array element is not a table")) - } + if array.elements.isEmpty { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "empty array")) + } - tableIndex = index - isKeyed = false - case .keyValue: - throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "key-value already exists")) - default: - let newTableAddress = keyTables.count - var newTable = InternalTOMLTable() - newTable.implicit = true - newTable.definedByDottedKey = false - keyTables.append(KeyTablePair(key: key, keyHash: keyHash, table: newTable)) - - if isKeyed { - if keyTables[tableIndex].table.tables.isEmpty { - keyTables[tableIndex].table.tables.reserveCapacity(8) - } - keyTables[tableIndex].table.tables.append(newTableAddress) - } else { - if tables[tableIndex].tables.isEmpty { - tables[tableIndex].tables.reserveCapacity(8) - } - tables[tableIndex].tables.append(newTableAddress) + guard case let .table(_, index) = array.elements.last else { + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "array element is not a table")) + } + + tableIndex = index + isKeyed = false + case .keyValue: + throw TOMLError(.syntax(lineNumber: token.lineNumber, message: "key-value already exists")) + default: + let newTableAddress = keyTables.count + var newTable = InternalTOMLTable() + newTable.implicit = true + newTable.definedByDottedKey = false + keyTables.append(KeyTablePair(key: key, keyHash: keyHash, table: newTable)) + + if isKeyed { + if keyTables[tableIndex].table.tables.isEmpty { + keyTables[tableIndex].table.tables.reserveCapacity(8) + } + keyTables[tableIndex].table.tables.append(newTableAddress) + keyTables[tableIndex].table.recordKeyHash(keyHash) + } else { + if tables[tableIndex].tables.isEmpty { + tables[tableIndex].tables.reserveCapacity(8) } - tableIndex = newTableAddress - isKeyed = true + tables[tableIndex].tables.append(newTableAddress) + tables[tableIndex].recordKeyHash(keyHash) } + tableIndex = newTableAddress + isKeyed = true } + } + mutating func walkTablePath(startTable: Int = 0, startKeyed: Bool = false) throws(TOMLError) { + var tableIndex = startTable + var isKeyed = startKeyed + for (key, keyHash) in tablePath { + try advanceTablePathSegment( + tableIndex: &tableIndex, + isKeyed: &isKeyed, + key: key, + keyHash: keyHash + ) + } currentTable = tableIndex currentTableIsKeyed = isKeyed } diff --git a/Sources/TOMLDecoder/Parsing/TOMLDocument.swift b/Sources/TOMLDecoder/Parsing/TOMLDocument.swift index ce77a20..189b94c 100644 --- a/Sources/TOMLDecoder/Parsing/TOMLDocument.swift +++ b/Sources/TOMLDecoder/Parsing/TOMLDocument.swift @@ -120,10 +120,22 @@ struct InternalTOMLTable: Equatable, Sendable { var implicit: Bool = false var readOnly: Bool = false var definedByDottedKey: Bool = false + var keyHashBloom: UInt64 = 0 var keyValues: [Int] = [] var arrays: [Int] = [] var tables: [Int] = [] + @inline(__always) + mutating func recordKeyHash(_ keyHash: Int) { + keyHashBloom |= keyHashBloomMask(keyHash) + } + + @inline(__always) + func mightContainKeyHash(_ keyHash: Int) -> Bool { + let mask = keyHashBloomMask(keyHash) + return (keyHashBloom & mask) == mask + } + func allKeys(_ document: TOMLDocument) -> [String] { var keys = [String]() for kv in keyValues { @@ -168,6 +180,14 @@ struct InternalTOMLTable: Equatable, Sendable { } } +@inline(__always) +private func keyHashBloomMask(_ keyHash: Int) -> UInt64 { + let raw = UInt(bitPattern: keyHash) + let bit0 = UInt64(1) << UInt64(raw & 63) + let bit1 = UInt64(1) << UInt64((raw >> 6) & 63) + return bit0 | bit1 +} + struct DateTimeComponents: Equatable { let date: LocalDate? let time: LocalTime?