From 5104baab87d03e6dc5cd7d00b0278aca8bed47a0 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sat, 25 Oct 2025 21:19:13 +0530 Subject: [PATCH 1/6] Lint fix --- .../Drivers/Databricks/RetryHttpHandler.cs | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 0f7f0b39e4..725d3fe400 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -16,11 +16,13 @@ */ using System; +using System.Diagnostics; using System.Net; using System.Net.Http; using System.Threading; using System.Threading.Tasks; using System.IO; +using Apache.Arrow.Adbc.Tracing; namespace Apache.Arrow.Adbc.Drivers.Databricks { @@ -52,6 +54,8 @@ protected override async Task SendAsync( HttpRequestMessage request, CancellationToken cancellationToken) { + Activity? activity = Activity.Current; + // Clone the request content if it's not null so we can reuse it for retries var requestContentClone = request.Content != null ? await CloneHttpContentAsync(request.Content) @@ -77,11 +81,35 @@ protected override async Task SendAsync( // If it's not a retryable status code, return immediately if (!IsRetryableStatusCode(response.StatusCode)) { - return response; + // Only log retry summary if retries occurred + if (attemptCount > 0) + { + activity?.SetTag("http.retry.total_attempts", attemptCount); + activity?.SetTag("http.retry.total_wait_seconds", totalRetrySeconds); + activity?.SetTag("http.retry.total_elapsed_seconds", + (DateTime.UtcNow - startTime).TotalSeconds); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); + } + return response; // Clean exit - no logging for normal success + } + + // Retry path - log details when we first enter retry path + if (attemptCount == 0) + { + activity?.SetTag("http.retry.max_timeout_seconds", _retryTimeoutSeconds); + activity?.SetTag("http.request.uri", SanitizeUri(request.RequestUri)); + activity?.AddEvent("http.retry.start", [ + new("status_code", (int)response.StatusCode) + ]); } attemptCount++; + activity?.AddEvent("http.retry.attempt_failed", [ + new("attempt_number", attemptCount), + new("status_code", (int)response.StatusCode) + ]); + // Check if we've exceeded the timeout TimeSpan elapsedTime = DateTime.UtcNow - startTime; if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) @@ -101,12 +129,26 @@ protected override async Task SendAsync( { // Use the Retry-After value waitSeconds = retryAfterSeconds; + activity?.AddEvent("http.retry.using_server_retry_after", [ + new("wait_seconds", waitSeconds), + new("attempt_number", attemptCount) + ]); + activity?.SetTag("http.retry.backoff_strategy", "server_retry_after"); lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using server-specified retry after {waitSeconds} seconds. Attempt {attemptCount}."; } else { // Invalid Retry-After value, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); + activity?.AddEvent("http.retry.invalid_retry_after_header", [ + new("retry_after_value", retryAfterValue), + new("fallback_strategy", "exponential_backoff") + ]); + activity?.AddEvent("http.retry.using_exponential_backoff", [ + new("wait_seconds", waitSeconds), + new("attempt_number", attemptCount) + ]); + activity?.SetTag("http.retry.backoff_strategy", "exponential"); lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Invalid Retry-After header, using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } } @@ -114,6 +156,11 @@ protected override async Task SendAsync( { // No Retry-After header, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); + activity?.AddEvent("http.retry.using_exponential_backoff", [ + new("wait_seconds", waitSeconds), + new("attempt_number", attemptCount) + ]); + activity?.SetTag("http.retry.backoff_strategy", "exponential"); lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } @@ -131,19 +178,44 @@ protected override async Task SendAsync( break; } + activity?.AddEvent("http.retry.waiting", [ + new("wait_seconds", waitSeconds), + new("attempt_number", attemptCount) + ]); + // Wait for the calculated time await Task.Delay(TimeSpan.FromSeconds(waitSeconds), cancellationToken); + // Update metrics tags + activity?.SetTag("http.retry.attempt_count", attemptCount); + activity?.SetTag("http.retry.total_wait_seconds", totalRetrySeconds); + // Increase backoff for next attempt (exponential backoff) currentBackoffSeconds = Math.Min(currentBackoffSeconds * 2, _maxBackoffSeconds); } while (!cancellationToken.IsCancellationRequested); + // Set final summary tags (only reached if retries exhausted) + activity?.SetTag("http.retry.total_attempts", attemptCount); + activity?.SetTag("http.retry.total_elapsed_seconds", + (DateTime.UtcNow - startTime).TotalSeconds); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); + // If we get here, we've either exceeded the timeout or been cancelled if (cancellationToken.IsCancellationRequested) { + activity?.AddEvent("http.retry.cancelled", [ + new("total_attempts", attemptCount) + ]); + activity?.SetTag("http.retry.outcome", "cancelled"); throw new OperationCanceledException("Request cancelled during retry wait", cancellationToken); } + // Timeout exceeded + activity?.AddEvent("http.retry.timeout_exceeded", [ + new("total_attempts", attemptCount), + new("configured_timeout_seconds", _retryTimeoutSeconds) + ]); + activity?.SetTag("http.retry.outcome", "timeout_exceeded"); throw new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError) .SetSqlState("08001"); } @@ -191,5 +263,15 @@ private static async Task CloneHttpContentAsync(HttpContent content } return clone; } + + /// + /// Sanitizes a URI for logging by removing query parameters and sensitive information. + /// + private static string SanitizeUri(Uri? uri) + { + if (uri == null) return "(null)"; + // Return only scheme, host, port, and path + return $"{uri.Scheme}://{uri.Host}{(uri.IsDefaultPort ? "" : $":{uri.Port}")}{uri.AbsolutePath}"; + } } } From c77d024f8fef5bf5ebf4db09f64d4134ba7f1fec Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sun, 26 Oct 2025 00:57:29 +0530 Subject: [PATCH 2/6] Minimised logging for the retry handlers --- .../Drivers/Databricks/RetryHttpHandler.cs | 79 ++----------------- 1 file changed, 6 insertions(+), 73 deletions(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 725d3fe400..35a715cae4 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -63,7 +63,6 @@ protected override async Task SendAsync( HttpResponseMessage response; string? lastErrorMessage = null; - DateTime startTime = DateTime.UtcNow; int attemptCount = 0; int currentBackoffSeconds = _initialBackoffSeconds; int totalRetrySeconds = 0; @@ -85,34 +84,15 @@ protected override async Task SendAsync( if (attemptCount > 0) { activity?.SetTag("http.retry.total_attempts", attemptCount); - activity?.SetTag("http.retry.total_wait_seconds", totalRetrySeconds); - activity?.SetTag("http.retry.total_elapsed_seconds", - (DateTime.UtcNow - startTime).TotalSeconds); activity?.SetTag("http.response.status_code", (int)response.StatusCode); } - return response; // Clean exit - no logging for normal success - } - - // Retry path - log details when we first enter retry path - if (attemptCount == 0) - { - activity?.SetTag("http.retry.max_timeout_seconds", _retryTimeoutSeconds); - activity?.SetTag("http.request.uri", SanitizeUri(request.RequestUri)); - activity?.AddEvent("http.retry.start", [ - new("status_code", (int)response.StatusCode) - ]); + return response; } attemptCount++; - activity?.AddEvent("http.retry.attempt_failed", [ - new("attempt_number", attemptCount), - new("status_code", (int)response.StatusCode) - ]); - - // Check if we've exceeded the timeout - TimeSpan elapsedTime = DateTime.UtcNow - startTime; - if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) + // Check if we've exceeded the total wait time + if (_retryTimeoutSeconds > 0 && totalRetrySeconds > _retryTimeoutSeconds) { // We've exceeded the timeout, so break out of the loop break; @@ -129,39 +109,20 @@ protected override async Task SendAsync( { // Use the Retry-After value waitSeconds = retryAfterSeconds; - activity?.AddEvent("http.retry.using_server_retry_after", [ - new("wait_seconds", waitSeconds), - new("attempt_number", attemptCount) - ]); - activity?.SetTag("http.retry.backoff_strategy", "server_retry_after"); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using server-specified retry after {waitSeconds} seconds. Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; } else { // Invalid Retry-After value, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - activity?.AddEvent("http.retry.invalid_retry_after_header", [ - new("retry_after_value", retryAfterValue), - new("fallback_strategy", "exponential_backoff") - ]); - activity?.AddEvent("http.retry.using_exponential_backoff", [ - new("wait_seconds", waitSeconds), - new("attempt_number", attemptCount) - ]); - activity?.SetTag("http.retry.backoff_strategy", "exponential"); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Invalid Retry-After header, using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; } } else { // No Retry-After header, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - activity?.AddEvent("http.retry.using_exponential_backoff", [ - new("wait_seconds", waitSeconds), - new("attempt_number", attemptCount) - ]); - activity?.SetTag("http.retry.backoff_strategy", "exponential"); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; } // Dispose the response before retrying @@ -178,43 +139,24 @@ protected override async Task SendAsync( break; } - activity?.AddEvent("http.retry.waiting", [ - new("wait_seconds", waitSeconds), - new("attempt_number", attemptCount) - ]); - // Wait for the calculated time await Task.Delay(TimeSpan.FromSeconds(waitSeconds), cancellationToken); - // Update metrics tags - activity?.SetTag("http.retry.attempt_count", attemptCount); - activity?.SetTag("http.retry.total_wait_seconds", totalRetrySeconds); - // Increase backoff for next attempt (exponential backoff) currentBackoffSeconds = Math.Min(currentBackoffSeconds * 2, _maxBackoffSeconds); } while (!cancellationToken.IsCancellationRequested); - // Set final summary tags (only reached if retries exhausted) activity?.SetTag("http.retry.total_attempts", attemptCount); - activity?.SetTag("http.retry.total_elapsed_seconds", - (DateTime.UtcNow - startTime).TotalSeconds); activity?.SetTag("http.response.status_code", (int)response.StatusCode); // If we get here, we've either exceeded the timeout or been cancelled if (cancellationToken.IsCancellationRequested) { - activity?.AddEvent("http.retry.cancelled", [ - new("total_attempts", attemptCount) - ]); activity?.SetTag("http.retry.outcome", "cancelled"); throw new OperationCanceledException("Request cancelled during retry wait", cancellationToken); } // Timeout exceeded - activity?.AddEvent("http.retry.timeout_exceeded", [ - new("total_attempts", attemptCount), - new("configured_timeout_seconds", _retryTimeoutSeconds) - ]); activity?.SetTag("http.retry.outcome", "timeout_exceeded"); throw new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError) .SetSqlState("08001"); @@ -264,14 +206,5 @@ private static async Task CloneHttpContentAsync(HttpContent content return clone; } - /// - /// Sanitizes a URI for logging by removing query parameters and sensitive information. - /// - private static string SanitizeUri(Uri? uri) - { - if (uri == null) return "(null)"; - // Return only scheme, host, port, and path - return $"{uri.Scheme}://{uri.Host}{(uri.IsDefaultPort ? "" : $":{uri.Port}")}{uri.AbsolutePath}"; - } } } From 165bb89e81856db98c2ffa13a2e90e22c3494773 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 27 Oct 2025 01:20:17 +0530 Subject: [PATCH 3/6] Corrected datetime logic --- csharp/src/Drivers/Databricks/RetryHttpHandler.cs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 35a715cae4..16dbbeb0f4 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -22,7 +22,7 @@ using System.Threading; using System.Threading.Tasks; using System.IO; -using Apache.Arrow.Adbc.Tracing; + namespace Apache.Arrow.Adbc.Drivers.Databricks { @@ -63,6 +63,7 @@ protected override async Task SendAsync( HttpResponseMessage response; string? lastErrorMessage = null; + DateTime startTime = DateTime.UtcNow; int attemptCount = 0; int currentBackoffSeconds = _initialBackoffSeconds; int totalRetrySeconds = 0; @@ -91,8 +92,9 @@ protected override async Task SendAsync( attemptCount++; - // Check if we've exceeded the total wait time - if (_retryTimeoutSeconds > 0 && totalRetrySeconds > _retryTimeoutSeconds) + // Check if we've exceeded the timeout + TimeSpan elapsedTime = DateTime.UtcNow - startTime; + if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) { // We've exceeded the timeout, so break out of the loop break; @@ -109,20 +111,20 @@ protected override async Task SendAsync( { // Use the Retry-After value waitSeconds = retryAfterSeconds; - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using server-specified retry after {waitSeconds} seconds. Attempt {attemptCount}."; } else { // Invalid Retry-After value, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Invalid Retry-After header, using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } } else { // No Retry-After header, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } // Dispose the response before retrying From df2f92d43ad0383a02855d5276c2dcf22757074b Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 27 Oct 2025 01:22:10 +0530 Subject: [PATCH 4/6] Lint fix --- csharp/src/Drivers/Databricks/RetryHttpHandler.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 16dbbeb0f4..4abc9c4ba2 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -23,7 +23,6 @@ using System.Threading.Tasks; using System.IO; - namespace Apache.Arrow.Adbc.Drivers.Databricks { /// @@ -207,6 +206,5 @@ private static async Task CloneHttpContentAsync(HttpContent content } return clone; } - } } From 0268e15f8e82a11455a8779131725051e1a679d2 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 27 Oct 2025 10:52:55 +0530 Subject: [PATCH 5/6] Added logs for each retry attempt --- .../Drivers/Databricks/RetryHttpHandler.cs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 4abc9c4ba2..a7aa0f8c21 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -91,6 +91,9 @@ protected override async Task SendAsync( attemptCount++; + activity?.SetTag("http.retry.attempt", attemptCount); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); + // Check if we've exceeded the timeout TimeSpan elapsedTime = DateTime.UtcNow - startTime; if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) @@ -154,13 +157,23 @@ protected override async Task SendAsync( if (cancellationToken.IsCancellationRequested) { activity?.SetTag("http.retry.outcome", "cancelled"); - throw new OperationCanceledException("Request cancelled during retry wait", cancellationToken); + var cancelEx = new OperationCanceledException("Request cancelled during retry wait", cancellationToken); + activity?.AddException(cancelEx, [ + new("error.context", "http.retry.cancelled"), + new("attempts", attemptCount) + ]); } // Timeout exceeded activity?.SetTag("http.retry.outcome", "timeout_exceeded"); - throw new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError) - .SetSqlState("08001"); + var exception = new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError).SetSqlState("08001"); + activity?.AddException(exception, [ + new("error.context", "http.retry.timeout_exceeded"), + new("attempts", attemptCount), + new("total_retry_seconds", totalRetrySeconds), + new("timeout_seconds", _retryTimeoutSeconds) + ]); + throw exception; } /// From 43623972efbb0edea5731d87417b092e4ef5f13d Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Wed, 5 Nov 2025 05:59:44 +0530 Subject: [PATCH 6/6] Fix Activity propagation in RetryHttpHandler for client logging Address reviewer feedback by implementing proper Activity tracing infrastructure following the CloudFetchDownloader pattern. Changes: - RetryHttpHandler now implements IActivityTracer interface - Accepts IActivityTracer parameter to delegate to connection's trace - Wraps retry logic in TraceActivityAsync for proper Activity creation - DatabricksConnection passes 'this' to RetryHttpHandler constructor - Updated all unit tests with MockActivityTracer This ensures Activity-based logging works reliably in client scenarios like PowerBI Desktop by properly propagating trace context through the handler chain, rather than relying on Activity.Current which may be null. Co-Authored-By: Claude --- .../Databricks/DatabricksConnection.cs | 4 +- .../Drivers/Databricks/RetryHttpHandler.cs | 208 +++++++++--------- .../Databricks/Unit/RetryHttpHandlerTest.cs | 36 ++- 3 files changed, 140 insertions(+), 108 deletions(-) diff --git a/csharp/src/Drivers/Databricks/DatabricksConnection.cs b/csharp/src/Drivers/Databricks/DatabricksConnection.cs index d6480d214f..8e8a71ac0b 100644 --- a/csharp/src/Drivers/Databricks/DatabricksConnection.cs +++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs @@ -590,8 +590,8 @@ protected override HttpMessageHandler CreateHttpHandler() { // Add retry handler for 408, 502, 503, 504 responses with Retry-After support // This must be INSIDE ThriftErrorMessageHandler so retries happen before exceptions are thrown - baseHandler = new RetryHttpHandler(baseHandler, TemporarilyUnavailableRetryTimeout); - baseAuthHandler = new RetryHttpHandler(baseAuthHandler, TemporarilyUnavailableRetryTimeout); + baseHandler = new RetryHttpHandler(baseHandler, this, TemporarilyUnavailableRetryTimeout); + baseAuthHandler = new RetryHttpHandler(baseAuthHandler, this, TemporarilyUnavailableRetryTimeout); } // Add Thrift error message handler AFTER retry handler (OUTSIDE in the chain) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index a7aa0f8c21..3b37d31e93 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -22,6 +22,7 @@ using System.Threading; using System.Threading.Tasks; using System.IO; +using Apache.Arrow.Adbc.Tracing; namespace Apache.Arrow.Adbc.Drivers.Databricks { @@ -29,8 +30,9 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks /// HTTP handler that implements retry behavior for 408, 502, 503, and 504 responses. /// Uses Retry-After header if present, otherwise uses exponential backoff. /// - internal class RetryHttpHandler : DelegatingHandler + internal class RetryHttpHandler : DelegatingHandler, IActivityTracer { + private readonly IActivityTracer _activityTracer; private readonly int _retryTimeoutSeconds; private readonly int _initialBackoffSeconds = 1; private readonly int _maxBackoffSeconds = 32; @@ -39,13 +41,21 @@ internal class RetryHttpHandler : DelegatingHandler /// Initializes a new instance of the class. /// /// The inner handler to delegate to. + /// The activity tracer for logging and diagnostics. /// Maximum total time in seconds to retry before failing. - public RetryHttpHandler(HttpMessageHandler innerHandler, int retryTimeoutSeconds) + public RetryHttpHandler(HttpMessageHandler innerHandler, IActivityTracer activityTracer, int retryTimeoutSeconds) : base(innerHandler) { + _activityTracer = activityTracer; _retryTimeoutSeconds = retryTimeoutSeconds; } + // IActivityTracer implementation - delegates to the connection + ActivityTrace IActivityTracer.Trace => _activityTracer.Trace; + string? IActivityTracer.TraceParent => _activityTracer.TraceParent; + string IActivityTracer.AssemblyVersion => _activityTracer.AssemblyVersion; + string IActivityTracer.AssemblyName => _activityTracer.AssemblyName; + /// /// Sends an HTTP request to the inner handler with retry logic for retryable status codes. /// @@ -53,127 +63,129 @@ protected override async Task SendAsync( HttpRequestMessage request, CancellationToken cancellationToken) { - Activity? activity = Activity.Current; - - // Clone the request content if it's not null so we can reuse it for retries - var requestContentClone = request.Content != null - ? await CloneHttpContentAsync(request.Content) - : null; - - HttpResponseMessage response; - string? lastErrorMessage = null; - DateTime startTime = DateTime.UtcNow; - int attemptCount = 0; - int currentBackoffSeconds = _initialBackoffSeconds; - int totalRetrySeconds = 0; - - do + return await this.TraceActivityAsync(async activity => { - // Set the content for each attempt (if needed) - if (requestContentClone != null && request.Content == null) + // Clone the request content if it's not null so we can reuse it for retries + var requestContentClone = request.Content != null + ? await CloneHttpContentAsync(request.Content) + : null; + + HttpResponseMessage response; + string? lastErrorMessage = null; + DateTime startTime = DateTime.UtcNow; + int attemptCount = 0; + int currentBackoffSeconds = _initialBackoffSeconds; + int totalRetrySeconds = 0; + + do { - request.Content = await CloneHttpContentAsync(requestContentClone); - } + // Set the content for each attempt (if needed) + if (requestContentClone != null && request.Content == null) + { + request.Content = await CloneHttpContentAsync(requestContentClone); + } - response = await base.SendAsync(request, cancellationToken); + response = await base.SendAsync(request, cancellationToken); - // If it's not a retryable status code, return immediately - if (!IsRetryableStatusCode(response.StatusCode)) - { - // Only log retry summary if retries occurred - if (attemptCount > 0) + // If it's not a retryable status code, return immediately + if (!IsRetryableStatusCode(response.StatusCode)) { - activity?.SetTag("http.retry.total_attempts", attemptCount); - activity?.SetTag("http.response.status_code", (int)response.StatusCode); + // Only log retry summary if retries occurred + if (attemptCount > 0) + { + activity?.SetTag("http.retry.total_attempts", attemptCount); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); + } + return response; } - return response; - } - attemptCount++; + attemptCount++; - activity?.SetTag("http.retry.attempt", attemptCount); - activity?.SetTag("http.response.status_code", (int)response.StatusCode); + activity?.SetTag("http.retry.attempt", attemptCount); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); - // Check if we've exceeded the timeout - TimeSpan elapsedTime = DateTime.UtcNow - startTime; - if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) - { - // We've exceeded the timeout, so break out of the loop - break; - } + // Check if we've exceeded the timeout + TimeSpan elapsedTime = DateTime.UtcNow - startTime; + if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) + { + // We've exceeded the timeout, so break out of the loop + break; + } - int waitSeconds; + int waitSeconds; - // Check for Retry-After header - if (response.Headers.TryGetValues("Retry-After", out var retryAfterValues)) - { - // Parse the Retry-After value - string retryAfterValue = string.Join(",", retryAfterValues); - if (int.TryParse(retryAfterValue, out int retryAfterSeconds) && retryAfterSeconds > 0) + // Check for Retry-After header + if (response.Headers.TryGetValues("Retry-After", out var retryAfterValues)) { - // Use the Retry-After value - waitSeconds = retryAfterSeconds; - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using server-specified retry after {waitSeconds} seconds. Attempt {attemptCount}."; + // Parse the Retry-After value + string retryAfterValue = string.Join(",", retryAfterValues); + if (int.TryParse(retryAfterValue, out int retryAfterSeconds) && retryAfterSeconds > 0) + { + // Use the Retry-After value + waitSeconds = retryAfterSeconds; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using server-specified retry after {waitSeconds} seconds. Attempt {attemptCount}."; + } + else + { + // Invalid Retry-After value, use exponential backoff + waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Invalid Retry-After header, using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; + } } else { - // Invalid Retry-After value, use exponential backoff + // No Retry-After header, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Invalid Retry-After header, using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } - } - else - { - // No Retry-After header, use exponential backoff - waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; - } - // Dispose the response before retrying - response.Dispose(); + // Dispose the response before retrying + response.Dispose(); - // Reset the request content for the next attempt - request.Content = null; + // Reset the request content for the next attempt + request.Content = null; - // Update total retry time - totalRetrySeconds += waitSeconds; - if (_retryTimeoutSeconds > 0 && totalRetrySeconds > _retryTimeoutSeconds) - { - // We've exceeded the timeout, so break out of the loop - break; - } + // Update total retry time + totalRetrySeconds += waitSeconds; + if (_retryTimeoutSeconds > 0 && totalRetrySeconds > _retryTimeoutSeconds) + { + // We've exceeded the timeout, so break out of the loop + break; + } - // Wait for the calculated time - await Task.Delay(TimeSpan.FromSeconds(waitSeconds), cancellationToken); + // Wait for the calculated time + await Task.Delay(TimeSpan.FromSeconds(waitSeconds), cancellationToken); - // Increase backoff for next attempt (exponential backoff) - currentBackoffSeconds = Math.Min(currentBackoffSeconds * 2, _maxBackoffSeconds); - } while (!cancellationToken.IsCancellationRequested); + // Increase backoff for next attempt (exponential backoff) + currentBackoffSeconds = Math.Min(currentBackoffSeconds * 2, _maxBackoffSeconds); + } while (!cancellationToken.IsCancellationRequested); - activity?.SetTag("http.retry.total_attempts", attemptCount); - activity?.SetTag("http.response.status_code", (int)response.StatusCode); + activity?.SetTag("http.retry.total_attempts", attemptCount); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); - // If we get here, we've either exceeded the timeout or been cancelled - if (cancellationToken.IsCancellationRequested) - { - activity?.SetTag("http.retry.outcome", "cancelled"); - var cancelEx = new OperationCanceledException("Request cancelled during retry wait", cancellationToken); - activity?.AddException(cancelEx, [ - new("error.context", "http.retry.cancelled"), - new("attempts", attemptCount) - ]); - } + // If we get here, we've either exceeded the timeout or been cancelled + if (cancellationToken.IsCancellationRequested) + { + activity?.SetTag("http.retry.outcome", "cancelled"); + var cancelEx = new OperationCanceledException("Request cancelled during retry wait", cancellationToken); + activity?.AddException(cancelEx, [ + new("error.context", "http.retry.cancelled"), + new("attempts", attemptCount) + ]); + throw cancelEx; + } - // Timeout exceeded - activity?.SetTag("http.retry.outcome", "timeout_exceeded"); - var exception = new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError).SetSqlState("08001"); - activity?.AddException(exception, [ - new("error.context", "http.retry.timeout_exceeded"), - new("attempts", attemptCount), - new("total_retry_seconds", totalRetrySeconds), - new("timeout_seconds", _retryTimeoutSeconds) - ]); - throw exception; + // Timeout exceeded + activity?.SetTag("http.retry.outcome", "timeout_exceeded"); + var exception = new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError).SetSqlState("08001"); + activity?.AddException(exception, [ + new("error.context", "http.retry.timeout_exceeded"), + new("attempts", attemptCount), + new("total_retry_seconds", totalRetrySeconds), + new("timeout_seconds", _retryTimeoutSeconds) + ]); + throw exception; + }, activityName: "RetryHttp"); } /// diff --git a/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs b/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs index 3a09fe8775..e7a9a5c471 100644 --- a/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs +++ b/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs @@ -20,6 +20,7 @@ using System.Threading; using System.Threading.Tasks; using Apache.Arrow.Adbc.Drivers.Databricks; +using Apache.Arrow.Adbc.Tracing; using Xunit; namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks.Unit @@ -34,6 +35,17 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks.Unit /// public class RetryHttpHandlerTest { + /// + /// Mock activity tracer for testing. + /// + private class MockActivityTracer : IActivityTracer + { + public string? TraceParent { get; set; } + public ActivityTrace Trace => new ActivityTrace("TestSource", "1.0.0", TraceParent); + public string AssemblyVersion => "1.0.0"; + public string AssemblyName => "TestAssembly"; + } + /// /// Tests that the RetryHttpHandler properly processes 503 responses with Retry-After headers. /// @@ -49,7 +61,8 @@ public async Task RetryAfterHandlerProcesses503Response() }); // Create the RetryHttpHandler with retry enabled and a 5-second timeout - var retryHandler = new RetryHttpHandler(mockHandler, 5); + var mockTracer = new MockActivityTracer(); + var retryHandler = new RetryHttpHandler(mockHandler, mockTracer, 5); // Create an HttpClient with our handler var httpClient = new HttpClient(retryHandler); @@ -84,7 +97,8 @@ public async Task RetryAfterHandlerThrowsWhenTimeoutExceeded() }); // Create the RetryHttpHandler with retry enabled and a 1-second timeout - var retryHandler = new RetryHttpHandler(mockHandler, 1); + var mockTracer = new MockActivityTracer(); + var retryHandler = new RetryHttpHandler(mockHandler, mockTracer, 1); // Create an HttpClient with our handler var httpClient = new HttpClient(retryHandler); @@ -115,7 +129,8 @@ public async Task RetryAfterHandlerHandlesNonRetryableResponse() }); // Create the RetryHttpHandler with retry enabled - var retryHandler = new RetryHttpHandler(mockHandler, 5); + var mockTracer = new MockActivityTracer(); + var retryHandler = new RetryHttpHandler(mockHandler, mockTracer, 5); // Create an HttpClient with our handler var httpClient = new HttpClient(retryHandler); @@ -143,7 +158,8 @@ public async Task RetryHandlerUsesExponentialBackoffFor503WithoutRetryAfterHeade }); // Create the RetryHttpHandler with retry enabled - var retryHandler = new RetryHttpHandler(mockHandler, 5); + var mockTracer = new MockActivityTracer(); + var retryHandler = new RetryHttpHandler(mockHandler, mockTracer, 5); // Create an HttpClient with our handler var httpClient = new HttpClient(retryHandler); @@ -191,7 +207,8 @@ public async Task RetryHandlerUsesExponentialBackoffForInvalidRetryAfterHeader() }); // Create the RetryHttpHandler with retry enabled - var retryHandler = new RetryHttpHandler(mockHandler, 5); + var mockTracer = new MockActivityTracer(); + var retryHandler = new RetryHttpHandler(mockHandler, mockTracer, 5); // Create an HttpClient with our handler var httpClient = new HttpClient(retryHandler); @@ -223,7 +240,8 @@ public async Task RetryHandlerProcessesRetryableStatusCodes(HttpStatusCode statu }); // Create the RetryHttpHandler with retry enabled - var retryHandler = new RetryHttpHandler(mockHandler, 5); + var mockTracer = new MockActivityTracer(); + var retryHandler = new RetryHttpHandler(mockHandler, mockTracer, 5); // Create an HttpClient with our handler var httpClient = new HttpClient(retryHandler); @@ -257,7 +275,8 @@ public async Task RetryHandlerHandlesMultipleRetriesWithExponentialBackoff() }); // Create the RetryHttpHandler with retry enabled and a generous timeout - var retryHandler = new RetryHttpHandler(mockHandler, 10); + var mockTracer = new MockActivityTracer(); + var retryHandler = new RetryHttpHandler(mockHandler, mockTracer, 10); // Create an HttpClient with our handler var httpClient = new HttpClient(retryHandler); @@ -296,7 +315,8 @@ public async Task RetryHandlerThrowsWhenServerNeverRecovers(HttpStatusCode statu }); // Create the RetryHttpHandler with a short timeout to make the test run faster - var retryHandler = new RetryHttpHandler(mockHandler, 3); + var mockTracer = new MockActivityTracer(); + var retryHandler = new RetryHttpHandler(mockHandler, mockTracer, 3); // Create an HttpClient with our handler var httpClient = new HttpClient(retryHandler);