diff --git a/src/codegen/types/objects/regex.ts b/src/codegen/types/objects/regex.ts index 6fb18533..99178419 100644 --- a/src/codegen/types/objects/regex.ts +++ b/src/codegen/types/objects/regex.ts @@ -50,15 +50,66 @@ export class RegexGenerator { return hi + lo; } - // Compile a regex pattern and return a pointer to the compiled regex - // Returns a pointer to regex_t struct (i8*) + private translateJSPatternToPOSIX(pattern: string): string { + let result = ""; + let inBracket = false; + for (let i = 0; i < pattern.length; i++) { + const ch = pattern[i]; + if (ch === "\\" && i + 1 < pattern.length) { + const next = pattern[i + 1]; + if (next === "d") { + result += inBracket ? "0-9" : "[0-9]"; + i++; + continue; + } + if (next === "D") { + result += inBracket ? "^0-9" : "[^0-9]"; + i++; + continue; + } + if (next === "w") { + result += inBracket ? "a-zA-Z0-9_" : "[a-zA-Z0-9_]"; + i++; + continue; + } + if (next === "W") { + result += inBracket ? "^a-zA-Z0-9_" : "[^a-zA-Z0-9_]"; + i++; + continue; + } + if (next === "s") { + result += inBracket ? " \\t\\n\\r" : "[ \\t\\n\\r]"; + i++; + continue; + } + if (next === "S") { + result += inBracket ? "^ \\t\\n\\r" : "[^ \\t\\n\\r]"; + i++; + continue; + } + result += ch; + result += next; + i++; + continue; + } + if (ch === "[" && !inBracket) { + inBracket = true; + } else if (ch === "]" && inBracket) { + inBracket = false; + } + result += ch; + } + return result; + } + generateRegexCompile(pattern: string, flags: string): string { this.ctx.setUsesRegex(true); + const posixPattern = this.translateJSPatternToPOSIX(pattern); let escaped = ""; let byteCount = 0; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; - const code = pattern.charCodeAt(i); + for (let i = 0; i < posixPattern.length; i++) { + const ch = posixPattern[i]; + const code = posixPattern.charCodeAt(i); if (ch === "\\") { escaped += "\\5C"; byteCount += 1; diff --git a/tests/fixtures/regex/regex-character-classes.ts b/tests/fixtures/regex/regex-character-classes.ts new file mode 100644 index 00000000..92ca34c9 --- /dev/null +++ b/tests/fixtures/regex/regex-character-classes.ts @@ -0,0 +1,64 @@ +function testCharacterClasses(): void { + const str = "hello 123 world"; + + const digitMatch = str.match(/(\d+)/); + if (digitMatch === null) { + console.log("FAIL: \\d should match digits"); + process.exit(1); + } + if (digitMatch[0] !== "123") { + console.log("FAIL: \\d match[0] expected 123, got " + digitMatch[0]); + process.exit(1); + } + + const wordMatch = str.match(/(\w+)/); + if (wordMatch === null) { + console.log("FAIL: \\w should match word chars"); + process.exit(1); + } + if (wordMatch[0] !== "hello") { + console.log("FAIL: \\w match[0] expected hello, got " + wordMatch[0]); + process.exit(1); + } + + const spaceMatch = str.match(/(\s+)/); + if (spaceMatch === null) { + console.log("FAIL: \\s should match whitespace"); + process.exit(1); + } + if (spaceMatch[0] !== " ") { + console.log("FAIL: \\s match[0] expected space, got '" + spaceMatch[0] + "'"); + process.exit(1); + } + + const nonDigitMatch = str.match(/(\D+)/); + if (nonDigitMatch === null || nonDigitMatch[0] !== "hello ") { + console.log("FAIL: \\D should match non-digits"); + process.exit(1); + } + + const nonWordMatch = str.match(/(\W+)/); + if (nonWordMatch === null || nonWordMatch[0] !== " ") { + console.log("FAIL: \\W should match non-word chars"); + process.exit(1); + } + + const nonSpaceMatch = str.match(/(\S+)/); + if (nonSpaceMatch === null || nonSpaceMatch[0] !== "hello") { + console.log("FAIL: \\S should match non-space"); + process.exit(1); + } + + if (!/\d{3}-\d{4}/.test("555-1234")) { + console.log("FAIL: phone pattern should match"); + process.exit(1); + } + + if (/\d+/.test("no digits here")) { + console.log("FAIL: \\d should not match letters"); + process.exit(1); + } + + console.log("TEST_PASSED"); +} +testCharacterClasses();