diff --git a/.jules/bolt.md b/.jules/bolt.md index 8dd2572..1ec4f4e 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -21,3 +21,7 @@ ## 2026-03-30 - [Tqdm Iterator Wrapping Overhead] **Learning:** Manually updating a `tqdm` progress bar with `pbar.update(1)` inside a high-iteration concurrent loop (like iterating over `concurrent.futures.as_completed()`) introduces unnecessary context manager and function call overhead on the main thread, blocking rapid iterator consumption. **Action:** When tracking progress over an iterator, wrap the iterator directly with `tqdm(iterator, total=N)` instead of using a `with tqdm` block and manual `update()` calls. This delegates progress tracking to `tqdm`'s optimized internal iteration logic, yielding significantly (~20%) faster loop execution. + +## 2026-04-03 - [Object Parsing Overhead in High Concurrency] +**Learning:** Instantiating `ipaddress.ip_address` repeatedly inside a concurrent worker loop on string representations incurs unnecessary CPU overhead. Even though string to IP object conversion takes mere microseconds, the cumulative cost across thousands of concurrent operations creates a noticeable slowdown. +**Action:** When a main thread generates parameters for worker threads and objects are already instantiated or can easily be instantiated during generation, pass the raw objects to worker threads directly instead of strings. Use an `isinstance` fast-path inside the worker thread function to avoid redundant instantiation, significantly reducing parsing overhead in the concurrent loop. diff --git a/testping1.py b/testping1.py index 786e20a..cbe6aa2 100644 --- a/testping1.py +++ b/testping1.py @@ -37,16 +37,21 @@ def is_reachable(ip, timeout=1): logging.error("IP address string too long") return False - # 🛡️ Sentinel: Validate IP address to prevent argument injection - # Catch TypeError alongside ValueError as ipaddress.ip_address() - # raises TypeError when passed None or non-string/int objects, - # which can crash the worker thread pool (DoS) if unhandled. - try: - ip_obj = ipaddress.ip_address(ip) - except (ValueError, TypeError): - # 🛡️ Sentinel: Sanitize log input to prevent CRLF/Log Injection - logging.error(f"Invalid IP address format: {repr(ip)}") - return False + # ⚡ Bolt: Fast-path for pre-instantiated IP objects to avoid redundant string parsing + # overhead. Avoids calling ipaddress.ip_address() for every ip. + if isinstance(ip, (ipaddress.IPv4Address, ipaddress.IPv6Address)): + ip_obj = ip + else: + # 🛡️ Sentinel: Validate IP address to prevent argument injection + # Catch TypeError alongside ValueError as ipaddress.ip_address() + # raises TypeError when passed None or non-string/int objects, + # which can crash the worker thread pool (DoS) if unhandled. + try: + ip_obj = ipaddress.ip_address(ip) + except (ValueError, TypeError): + # 🛡️ Sentinel: Sanitize log input to prevent CRLF/Log Injection + logging.error(f"Invalid IP address format: {repr(ip)}") + return False # 🛡️ Sentinel: Prevent Server-Side Request Forgery (SSRF) # Block loopback, link-local, multicast, unspecified, and reserved addresses from being pinged. @@ -142,7 +147,8 @@ def is_reachable(ip, timeout=1): # Using .compressed instead of str() further avoids overhead, yielding ~15-20% faster generation. base_int = int(start_obj) ip_class = type(start_obj) - ips_to_scan = [ip_class(base_int + i).compressed for i in range(total_ips)] + # ⚡ Bolt: Pass pre-instantiated IP objects to worker threads to avoid string parsing overhead + ips_to_scan = [ip_class(base_int + i) for i in range(total_ips)] # ⚡ Bolt: Parallelize network scanning using ThreadPoolExecutor # Reduces scan time significantly by performing pings concurrently instead of sequentially.