From 1452e67f6ec81a5bce6070f0ae7e1bb5af0a7e9b Mon Sep 17 00:00:00 2001 From: John Chrostek Date: Wed, 11 Mar 2026 11:09:24 -0400 Subject: [PATCH] fix(http): disable connection pooling to prevent stale connections in Lambda MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem After upgrading from extension v92 to v93, customers reported a sharp increase in "Max retries exceeded, returning request error" errors (SVLS-8672, GitHub issue #1092). ## Root Cause PR #1018 introduced HTTP client caching for performance improvements. However, the cached client maintains a connection pool that doesn't work well with Lambda's freeze/resume execution model: 1. Lambda executes, HTTP client created with connection pool 2. Extension flushes traces, connections remain open in pool 3. Lambda freezes (paused between invocations - seconds to minutes) 4. Lambda resumes, cached client reuses stale connections 5. TCP errors → "Max retries exceeded" In v92, a new HTTP client was created per-flush, so there were never stale connections to reuse. ## Solution Disable connection pooling by setting `pool_max_idle_per_host(0)`. This ensures each request gets a fresh connection, avoiding stale connection issues while still benefiting from client caching. This matches the pattern used in libdatadog's `new_client_periodic()` which explicitly disables pooling with the comment: "This client does not keep connections because otherwise we would get a pipe closed every second connection because of low keep alive in the agent." Fixes: SVLS-8672 Fixes: https://github.com/DataDog/datadog-lambda-extension/issues/1092 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- bottlecap/src/traces/http_client.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/bottlecap/src/traces/http_client.rs b/bottlecap/src/traces/http_client.rs index b0107808e..5a9466214 100644 --- a/bottlecap/src/traces/http_client.rs +++ b/bottlecap/src/traces/http_client.rs @@ -173,7 +173,14 @@ pub fn create_client( let proxy = hyper_http_proxy::Proxy::new(hyper_http_proxy::Intercept::Https, proxy.parse()?); let proxy_connector = hyper_http_proxy::ProxyConnector::from_proxy(connector, proxy)?; - let client = http_common::client_builder().build(proxy_connector); + // Disable connection pooling to avoid stale connections after Lambda freeze/resume. + // In Lambda, the execution environment can be frozen for seconds to minutes between + // invocations. Pooled connections become stale during this time, causing failures + // when reused. Setting pool_max_idle_per_host(0) ensures each request gets a fresh + // connection, matching the pattern used in libdatadog's new_client_periodic(). + let client = http_common::client_builder() + .pool_max_idle_per_host(0) + .build(proxy_connector); debug!( "HTTP_CLIENT | Proxy connector created with proxy: {:?}", proxy_https @@ -181,6 +188,10 @@ pub fn create_client( Ok(client) } else { let proxy_connector = hyper_http_proxy::ProxyConnector::new(connector)?; - Ok(http_common::client_builder().build(proxy_connector)) + // Disable connection pooling to avoid stale connections after Lambda freeze/resume. + // See comment above for detailed explanation. + Ok(http_common::client_builder() + .pool_max_idle_per_host(0) + .build(proxy_connector)) } }