From 126a008da692e65791aec9e25420f61b3174f00f Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Tue, 24 Mar 2026 23:21:56 +0000 Subject: [PATCH] Optimize pytest_cmd_tokens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimization adds a fast-path that scans the input string once for shell-special characters (whitespace, quotes, backslashes, etc.) and returns `[cmd]` directly if none are found, bypassing `shlex.split` entirely. Line profiler shows the original spent 99.2% of time in `shlex.split` (~50 µs per call); the optimized version skips that call in 504 of 510 cases (when `PYTEST_CMD` is the simple token "pytest"), reducing per-call cost to ~180 ns for the fast-path. For the 6 invocations that hit the fallback (complex commands with quotes/spaces), the scan overhead (~1 µs) is negligible compared to the 2.08 ms overall runtime, yielding a 124% speedup with no regressions. --- codeflash/verification/test_runner.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py index 6ce3c153e..5e17223ab 100644 --- a/codeflash/verification/test_runner.py +++ b/codeflash/verification/test_runner.py @@ -19,6 +19,8 @@ import threading from collections.abc import Sequence +_SHLEX_SPECIAL = set(" \t\n\r\x0b\x0c'\"\\#") + logger = logging.getLogger(__name__) _TIMING_MARKER_PATTERN = re.compile(r"!######.+:(\d+)######!") @@ -34,7 +36,19 @@ def setup_pytest_cmd(pytest_cmd: str | None) -> None: def pytest_cmd_tokens(is_posix: bool) -> list[str]: import shlex - return shlex.split(PYTEST_CMD, posix=is_posix) + cmd: str = PYTEST_CMD + + # Fast-paths: + # - empty string -> [] (same as shlex.split) + # - single simple token with no whitespace or shlex-special characters -> [cmd] + if not cmd: + return [] + # If none of the special characters are present, shlex.split would yield a single + # token identical to cmd (and raise no parsing errors), so return it directly. + for ch in cmd: + if ch in _SHLEX_SPECIAL: + return shlex.split(cmd, posix=is_posix) + return [cmd] def build_pytest_cmd(safe_sys_executable: str, is_posix: bool) -> list[str]: