From 3166677541e3c55871a2841faf3cb6024f333174 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 6 Mar 2026 10:44:04 +0100 Subject: [PATCH 01/18] RUBY-3724 Fix backpressure tests --- .evergreen/config.yml | 4 +- .evergreen/config/standard.yml.erb | 4 +- spec/integration/sdam_prose_spec.rb | 71 ++++++++++ spec/integration/secondary_reads_spec.rb | 4 +- spec/mongo/retryable/token_bucket_spec.rb | 11 +- .../data/sdam_unified/minPoolSize-error.yml | 5 +- .../pool-clear-min-pool-size-error.yml | 132 ++++++++++++++++++ 7 files changed, 222 insertions(+), 9 deletions(-) create mode 100644 spec/spec_tests/data/sdam_unified/pool-clear-min-pool-size-error.yml diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 2d321d0b38..32591a083e 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -1273,7 +1273,7 @@ buildvariants: - matrix_name: CSOT matrix_spec: ruby: "ruby-4.0" - mongodb-version: "8.0" + mongodb-version: ["8.0", "rapid"] topology: replica-set-single-node os: ubuntu2204 display_name: "CSOT - ${mongodb-version}" @@ -1532,7 +1532,7 @@ buildvariants: auth-and-ssl: "noauth-and-nossl" ruby: ["ruby-4.0", "ruby-3.4", "ruby-3.3", "ruby-3.2", "ruby-3.1"] topology: [replica-set, sharded-cluster] - mongodb-version: [ '6.0', '7.0', '8.0' ] + mongodb-version: [ '6.0', '7.0', '8.0', 'rapid' ] os: ubuntu2204 fle: helper display_name: "FLE: ${mongodb-version} ${topology} ${ruby}" diff --git a/.evergreen/config/standard.yml.erb b/.evergreen/config/standard.yml.erb index 2dbc222dd0..c960841a61 100644 --- a/.evergreen/config/standard.yml.erb +++ b/.evergreen/config/standard.yml.erb @@ -151,7 +151,7 @@ buildvariants: - matrix_name: CSOT matrix_spec: ruby: <%= latest_ruby %> - mongodb-version: <%= latest_stable_mdb %> + mongodb-version: <%= stable_and_rapid %> topology: replica-set-single-node os: ubuntu2204 display_name: "CSOT - ${mongodb-version}" @@ -350,7 +350,7 @@ buildvariants: auth-and-ssl: "noauth-and-nossl" ruby: <%= supported_mri_rubies_3_ubuntu %> topology: [replica-set, sharded-cluster] - mongodb-version: [ '6.0', '7.0', '8.0' ] + mongodb-version: [ '6.0', '7.0', '8.0', 'rapid' ] os: ubuntu2204 fle: helper display_name: "FLE: ${mongodb-version} ${topology} ${ruby}" diff --git a/spec/integration/sdam_prose_spec.rb b/spec/integration/sdam_prose_spec.rb index 9cf0c33ac6..846e9ddf6f 100644 --- a/spec/integration/sdam_prose_spec.rb +++ b/spec/integration/sdam_prose_spec.rb @@ -64,4 +64,75 @@ configureFailPoint: 'failCommand', mode: 'off') end end + + describe 'Connection Pool Backpressure' do + min_server_fcv '8.2' + require_topology :single + + let(:subscriber) { Mrss::EventSubscriber.new } + + let(:client) do + new_local_client( + SpecConfig.instance.addresses, + SpecConfig.instance.all_test_options.merge( + max_connecting: 100, + max_pool_size: 100, + ), + ).tap do |client| + client.subscribe(Mongo::Monitoring::CONNECTION_POOL, subscriber) + end + end + + after do + sleep 1 + root_authorized_client.use('admin').database.command( + setParameter: 1, + ingressConnectionEstablishmentRateLimiterEnabled: false, + ) + end + + it 'generates checkout failures when the ingress connection rate limiter is active' do + # Enable the ingress rate limiter. + root_authorized_client.use('admin').database.command( + setParameter: 1, + ingressConnectionEstablishmentRateLimiterEnabled: true, + ) + root_authorized_client.use('admin').database.command( + setParameter: 1, + ingressConnectionEstablishmentRatePerSec: 20, + ) + root_authorized_client.use('admin').database.command( + setParameter: 1, + ingressConnectionEstablishmentBurstCapacitySecs: 1, + ) + root_authorized_client.use('admin').database.command( + setParameter: 1, + ingressConnectionEstablishmentMaxQueueDepth: 1, + ) + + # Add a document so $where has something to process. + client.use('test')['test'].delete_many + client.use('test')['test'].insert_one({}) + + # Run 100 parallel find_one operations that contend for connections. + threads = 100.times.map do + Thread.new do + begin + client.use('test')['test'].find( + '$where' => 'function() { sleep(2000); return true; }' + ).first + rescue StandardError + # Ignore connection errors (including checkout timeouts). + end + end + end + threads.each(&:join) + + checkout_failed = subscriber.select_published_events( + Mongo::Monitoring::Event::Cmap::ConnectionCheckOutFailed + ) + + expect(checkout_failed.length).to be >= 10 + end + end end diff --git a/spec/integration/secondary_reads_spec.rb b/spec/integration/secondary_reads_spec.rb index 53813081ad..12d8bdc235 100644 --- a/spec/integration/secondary_reads_spec.rb +++ b/spec/integration/secondary_reads_spec.rb @@ -28,7 +28,7 @@ end_stats = get_read_counters - end_stats[:secondary].should be_within(10).of(start_stats[:secondary]) + end_stats[:secondary].should be_within(50).of(start_stats[:secondary]) end_stats[:primary].should >= start_stats[:primary] + 30 end end @@ -50,7 +50,7 @@ end_stats = get_read_counters - end_stats[:primary].should be_within(10).of(start_stats[:primary]) + end_stats[:primary].should be_within(50).of(start_stats[:primary]) end_stats[:secondary].should >= start_stats[:secondary] + 30 end end diff --git a/spec/mongo/retryable/token_bucket_spec.rb b/spec/mongo/retryable/token_bucket_spec.rb index 175008e258..7ddf431b7d 100644 --- a/spec/mongo/retryable/token_bucket_spec.rb +++ b/spec/mongo/retryable/token_bucket_spec.rb @@ -67,7 +67,16 @@ end describe 'thread safety' do - let(:bucket) { described_class.new(capacity: 1000) } + # Use capacity 2000, start at 1000 tokens. + # With 500 consumes and 500 deposits, floor/ceiling cannot be hit: + # min possible = 1000 - 500 = 500 > 0 (all consumes succeed) + # max possible = 1000 + 500 = 1500 < 2000 (all deposits effective) + # So the net change is guaranteed to be 0, making the assertion reliable. + let(:bucket) do + b = described_class.new(capacity: 2000) + b.consume(1000) + b + end def run_concurrent_operations(bucket) threads = [] diff --git a/spec/spec_tests/data/sdam_unified/minPoolSize-error.yml b/spec/spec_tests/data/sdam_unified/minPoolSize-error.yml index 110e647c62..1bbc0c376e 100644 --- a/spec/spec_tests/data/sdam_unified/minPoolSize-error.yml +++ b/spec/spec_tests/data/sdam_unified/minPoolSize-error.yml @@ -21,7 +21,7 @@ initialData: &initialData documents: [] tests: - - description: Network error on minPoolSize background creation + - description: Server error on minPoolSize background creation operations: # Configure the initial monitor handshake to succeed but the # first or second background minPoolSize establishments to fail. @@ -38,7 +38,7 @@ tests: - hello - isMaster appName: SDAMminPoolSizeError - closeConnection: true + errorCode: 91 - name: createEntities object: testRunner arguments: @@ -54,6 +54,7 @@ tests: heartbeatFrequencyMS: 10000 appname: SDAMminPoolSizeError minPoolSize: 10 + serverMonitoringMode: poll serverSelectionTimeoutMS: 1000 - database: id: &database database diff --git a/spec/spec_tests/data/sdam_unified/pool-clear-min-pool-size-error.yml b/spec/spec_tests/data/sdam_unified/pool-clear-min-pool-size-error.yml new file mode 100644 index 0000000000..2c8e32a410 --- /dev/null +++ b/spec/spec_tests/data/sdam_unified/pool-clear-min-pool-size-error.yml @@ -0,0 +1,132 @@ +--- +description: pool-cleared-on-min-pool-size-population-error + +schemaVersion: "1.4" + +runOnRequirements: + # failCommand appName requirements + - minServerVersion: "4.4" + serverless: forbid + topologies: [ single ] + +createEntities: + - client: + id: &setupClient setupClient + useMultipleMongoses: false + +tests: + - description: Pool is cleared on authentication error during minPoolSize population + runOnRequirements: + # failCommand appName requirements + - auth: true + operations: + - name: failPoint + object: testRunner + arguments: + client: *setupClient + failPoint: + configureFailPoint: failCommand + mode: + times: 1 + data: + failCommands: + - saslContinue + appName: authErrorTest + errorCode: 18 + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + observeEvents: + - poolReadyEvent + - poolClearedEvent + - connectionClosedEvent + uriOptions: + appname: authErrorTest + minPoolSize: 1 + + - name: waitForEvent + object: testRunner + arguments: + client: *client + event: + poolReadyEvent: {} + count: 1 + + - name: waitForEvent + object: testRunner + arguments: + client: *client + event: + poolClearedEvent: {} + count: 1 + + - name: waitForEvent + object: testRunner + arguments: + client: *client + event: + connectionClosedEvent: {} + count: 1 + + - description: Pool is not cleared on handshake error during minPoolSize population + operations: + - name: failPoint + object: testRunner + arguments: + client: *setupClient + failPoint: + configureFailPoint: failCommand + mode: + skip: 1 # skip one to let monitoring thread to move pool to ready state + data: + failCommands: + - hello + - isMaster + appName: authErrorTest + closeConnection: true + + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + observeEvents: + - poolReadyEvent + - poolClearedEvent + - connectionClosedEvent + uriOptions: + appname: authErrorTest + minPoolSize: 5 + maxConnecting: 1 + # ensure that once we've connected to the server, the failCommand won't + # be triggered by monitors and will only be triggered by handshakes + serverMonitoringMode: poll + heartbeatFrequencyMS: 1000000 + + - name: waitForEvent + object: testRunner + arguments: + client: *client + event: + poolReadyEvent: {} + count: 1 + + - name: waitForEvent + object: testRunner + arguments: + client: *client + event: + connectionClosedEvent: {} + count: 1 + + - name: assertEventCount + object: testRunner + arguments: + client: *client + event: + poolClearedEvent: {} + count: 0 From 8bb216d5f01175c64932a798f65b4f3b4199cf18 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov <160598371+comandeo-mongo@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:52:45 +0100 Subject: [PATCH 02/18] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- spec/integration/sdam_prose_spec.rb | 67 ++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/spec/integration/sdam_prose_spec.rb b/spec/integration/sdam_prose_spec.rb index 846e9ddf6f..2df63cb4e0 100644 --- a/spec/integration/sdam_prose_spec.rb +++ b/spec/integration/sdam_prose_spec.rb @@ -85,27 +85,72 @@ after do sleep 1 - root_authorized_client.use('admin').database.command( - setParameter: 1, - ingressConnectionEstablishmentRateLimiterEnabled: false, - ) + admin_db = root_authorized_client.use('admin').database + + if defined?(@prev_ingressConnectionEstablishmentRateLimiterEnabled) && + defined?(@prev_ingressConnectionEstablishmentRatePerSec) && + defined?(@prev_ingressConnectionEstablishmentBurstCapacitySecs) && + defined?(@prev_ingressConnectionEstablishmentMaxQueueDepth) + admin_db.command( + setParameter: 1, + ingressConnectionEstablishmentRateLimiterEnabled: @prev_ingressConnectionEstablishmentRateLimiterEnabled, + ) + admin_db.command( + setParameter: 1, + ingressConnectionEstablishmentRatePerSec: @prev_ingressConnectionEstablishmentRatePerSec, + ) + admin_db.command( + setParameter: 1, + ingressConnectionEstablishmentBurstCapacitySecs: @prev_ingressConnectionEstablishmentBurstCapacitySecs, + ) + admin_db.command( + setParameter: 1, + ingressConnectionEstablishmentMaxQueueDepth: @prev_ingressConnectionEstablishmentMaxQueueDepth, + ) + else + # Fallback: at least disable the limiter if previous values were not captured. + admin_db.command( + setParameter: 1, + ingressConnectionEstablishmentRateLimiterEnabled: false, + ) + end end it 'generates checkout failures when the ingress connection rate limiter is active' do - # Enable the ingress rate limiter. - root_authorized_client.use('admin').database.command( + admin_db = root_authorized_client.use('admin').database + + # Capture current ingress connection establishment parameters so they can be restored. + current_params = admin_db.command( + getParameter: 1, + ingressConnectionEstablishmentRateLimiterEnabled: 1, + ingressConnectionEstablishmentRatePerSec: 1, + ingressConnectionEstablishmentBurstCapacitySecs: 1, + ingressConnectionEstablishmentMaxQueueDepth: 1, + ).first + + @prev_ingressConnectionEstablishmentRateLimiterEnabled = + current_params['ingressConnectionEstablishmentRateLimiterEnabled'] + @prev_ingressConnectionEstablishmentRatePerSec = + current_params['ingressConnectionEstablishmentRatePerSec'] + @prev_ingressConnectionEstablishmentBurstCapacitySecs = + current_params['ingressConnectionEstablishmentBurstCapacitySecs'] + @prev_ingressConnectionEstablishmentMaxQueueDepth = + current_params['ingressConnectionEstablishmentMaxQueueDepth'] + + # Enable the ingress rate limiter with test-specific values. + admin_db.command( setParameter: 1, ingressConnectionEstablishmentRateLimiterEnabled: true, ) - root_authorized_client.use('admin').database.command( + admin_db.command( setParameter: 1, ingressConnectionEstablishmentRatePerSec: 20, ) - root_authorized_client.use('admin').database.command( + admin_db.command( setParameter: 1, ingressConnectionEstablishmentBurstCapacitySecs: 1, ) - root_authorized_client.use('admin').database.command( + admin_db.command( setParameter: 1, ingressConnectionEstablishmentMaxQueueDepth: 1, ) @@ -121,7 +166,9 @@ client.use('test')['test'].find( '$where' => 'function() { sleep(2000); return true; }' ).first - rescue StandardError + rescue Mongo::Error::PoolTimeout, + Mongo::Error::SocketError, + Mongo::Error::NoServerAvailable # Ignore connection errors (including checkout timeouts). end end From 680e19e14d60542723c3ffae618bd0e274384ede Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Thu, 26 Mar 2026 16:36:04 +0100 Subject: [PATCH 03/18] Fix rescue clause in backpressure prose test Mongo::Error::PoolTimeout does not exist; replace with StandardError which covers all connection-related errors (SocketError, pool checkout timeouts, NoServerAvailable, etc.). --- spec/integration/sdam_prose_spec.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/spec/integration/sdam_prose_spec.rb b/spec/integration/sdam_prose_spec.rb index 2df63cb4e0..cb944a3a0a 100644 --- a/spec/integration/sdam_prose_spec.rb +++ b/spec/integration/sdam_prose_spec.rb @@ -166,9 +166,7 @@ client.use('test')['test'].find( '$where' => 'function() { sleep(2000); return true; }' ).first - rescue Mongo::Error::PoolTimeout, - Mongo::Error::SocketError, - Mongo::Error::NoServerAvailable + rescue StandardError # Ignore connection errors (including checkout timeouts). end end From f29174b5b1313a330f4bf2de75c8afecdf83931d Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 10:11:20 +0100 Subject: [PATCH 04/18] RUBY-3770 Implement makeTimeoutError semantics in withTransaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per DRIVERS-3391 / transactions-convenient-api spec, withTransaction must propagate a TimeoutError (wrapping the last transient error as .cause) when the CSOT deadline is exhausted, instead of re-raising the raw error. Changes to lib/mongo/session.rb: - Callback raises TransientTransactionError + deadline expired: raise TimeoutError in CSOT mode, re-raise original in non-CSOT mode. - Commit raises UnknownTransactionCommitResult + deadline expired: same. - Commit raises TransientTransactionError + deadline expired: same. - Backoff (regular and overload) would exceed deadline: use new make_timeout_error_from helper — CSOT wraps last_error with cause, non-CSOT raises last_error directly (fixes incorrect TimeoutError that was raised in non-CSOT mode before this change). - Commit overload backoff would exceed deadline: raise TimeoutError in CSOT mode, re-raise in non-CSOT mode. - Add make_timeout_error_from private helper implementing the makeTimeoutError(lastError) pseudocode from the spec. Uses an inner begin/rescue to wire .cause when called outside a rescue block. New spec/mongo/session/with_transaction_timeout_spec.rb covers all eight code paths as unit tests with stubbed time control (no real server needed). --- lib/mongo/session.rb | 48 ++- .../session/with_transaction_timeout_spec.rb | 368 ++++++++++++++++++ 2 files changed, 410 insertions(+), 6 deletions(-) create mode 100644 spec/mongo/session/with_transaction_timeout_spec.rb diff --git a/lib/mongo/session.rb b/lib/mongo/session.rb index 629af3e800..86f7665a9f 100644 --- a/lib/mongo/session.rb +++ b/lib/mongo/session.rb @@ -475,7 +475,7 @@ def with_transaction(options = nil) if overload_encountered delay = @client.retry_policy.backoff_delay(overload_error_count) if backoff_would_exceed_deadline?(deadline, delay) - raise Mongo::Error::TimeoutError, 'CSOT timeout expired waiting to retry withTransaction' + make_timeout_error_from(last_error) end unless @client.retry_policy.should_retry_overload?(overload_error_count, delay) raise(last_error) @@ -484,7 +484,7 @@ def with_transaction(options = nil) else backoff = backoff_seconds_for_retry(transaction_attempt) if backoff_would_exceed_deadline?(deadline, backoff) - raise Mongo::Error::TimeoutError, 'CSOT timeout expired waiting to retry withTransaction' + make_timeout_error_from(last_error) end sleep(backoff) end @@ -513,7 +513,11 @@ def with_transaction(options = nil) if deadline_expired?(deadline) transaction_in_progress = false - raise + if @with_transaction_timeout_ms + raise Mongo::Error::TimeoutError, 'CSOT timeout expired during withTransaction callback' + else + raise + end end if e.is_a?(Mongo::Error) && e.label?('TransientTransactionError') @@ -554,7 +558,11 @@ def with_transaction(options = nil) e.is_a?(Error::OperationFailure::Family) && e.max_time_ms_expired? then transaction_in_progress = false - raise + if @with_transaction_timeout_ms && deadline_expired?(deadline) + raise Mongo::Error::TimeoutError, 'CSOT timeout expired during withTransaction commit' + else + raise + end end if e.label?('SystemOverloadedError') @@ -569,7 +577,11 @@ def with_transaction(options = nil) delay = @client.retry_policy.backoff_delay(overload_error_count) if backoff_would_exceed_deadline?(deadline, delay) transaction_in_progress = false - raise + if @with_transaction_timeout_ms + raise Mongo::Error::TimeoutError, 'CSOT timeout expired during withTransaction commit' + else + raise + end end unless @client.retry_policy.should_retry_overload?(overload_error_count, delay) transaction_in_progress = false @@ -591,7 +603,11 @@ def with_transaction(options = nil) elsif e.label?('TransientTransactionError') if Utils.monotonic_time >= deadline transaction_in_progress = false - raise + if @with_transaction_timeout_ms + raise Mongo::Error::TimeoutError, 'CSOT timeout expired during withTransaction commit' + else + raise + end end last_error = e if e.label?('SystemOverloadedError') @@ -1436,5 +1452,25 @@ def backoff_would_exceed_deadline?(deadline, backoff_seconds) Utils.monotonic_time + backoff_seconds >= deadline end + + # Implements makeTimeoutError(lastError) from the transactions-convenient-api spec. + # Called when the withTransaction retry loop cannot continue because backoff would + # exceed the deadline. + # + # - CSOT mode: raises TimeoutError with lastError as Ruby's .cause + # - non-CSOT mode: re-raises lastError directly + # + # Must be called from outside a rescue block; raises unconditionally. + def make_timeout_error_from(last_error) + if @with_transaction_timeout_ms + begin + raise last_error + rescue + raise Mongo::Error::TimeoutError, 'CSOT timeout expired waiting to retry withTransaction' + end + else + raise last_error + end + end end end diff --git a/spec/mongo/session/with_transaction_timeout_spec.rb b/spec/mongo/session/with_transaction_timeout_spec.rb new file mode 100644 index 0000000000..3757980da7 --- /dev/null +++ b/spec/mongo/session/with_transaction_timeout_spec.rb @@ -0,0 +1,368 @@ +# frozen_string_literal: true + +require 'spec_helper' + +# Prose tests for the "Retry Timeout is Enforced" section of the +# transactions-convenient-api spec README. +# +# specifications/source/transactions-convenient-api/tests/README.md +# +# Three sub-cases must be covered: +# 1. Callback raises TransientTransactionError and timeout is exceeded. +# 2. Commit raises UnknownTransactionCommitResult and timeout is exceeded. +# 3. Commit raises TransientTransactionError and timeout is exceeded. +# +# Note 1 from spec: "The error SHOULD be propagated as a timeout error if +# the language allows to expose the underlying error as a cause of a timeout +# error." Ruby supports this via Exception#cause. +describe 'Mongo::Session#with_transaction Retry Timeout is Enforced' do + let(:retry_policy) do + Mongo::Retryable::RetryPolicy.new(adaptive_retries: false) + end + + let(:client) do + instance_double(Mongo::Client).tap do |c| + allow(c).to receive(:retry_policy).and_return(retry_policy) + allow(c).to receive(:timeout_ms).and_return(nil) + end + end + + let(:session) do + sess = Mongo::Session.allocate + sess.instance_variable_set(:@client, client) + sess.instance_variable_set(:@options, {}) + sess.instance_variable_set(:@state, Mongo::Session::NO_TRANSACTION_STATE) + sess.instance_variable_set(:@lock, Mutex.new) + allow(sess).to receive(:check_transactions_supported!).and_return(true) + allow(sess).to receive(:check_if_ended!) + allow(sess).to receive(:log_warn) + allow(sess).to receive(:session_id).and_return(BSON::Document.new('id' => 'test')) + sess + end + + before do + allow(session).to receive(:start_transaction) do |*_args| + session.instance_variable_set(:@state, Mongo::Session::STARTING_TRANSACTION_STATE) + end + + allow(session).to receive(:abort_transaction) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_ABORTED_STATE) + end + + allow(session).to receive(:commit_transaction) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_COMMITTED_STATE) + end + + allow(session).to receive(:sleep) + end + + # Stubs Mongo::Utils.monotonic_time to return a fixed "present" time for the + # first `initial_calls` invocations, then a time far in the future for all + # remaining calls. Combined with timeout_ms: 1 (deadline ≈ present + 0.001 s) + # this makes every deadline check after the Nth call return "expired". + def with_expired_deadline_after(initial_calls:, &block) + call_count = 0 + allow(Mongo::Utils).to receive(:monotonic_time) do + call_count += 1 + call_count <= initial_calls ? 100.0 : 200.0 + end + block.call + end + + def make_transient_error + error = Mongo::Error::OperationFailure.new('transient') + error.add_label('TransientTransactionError') + error + end + + def make_commit_unknown_error + error = Mongo::Error::OperationFailure.new('commit unknown') + error.add_label('UnknownTransactionCommitResult') + error + end + + def make_commit_transient_error + error = Mongo::Error::OperationFailure.new('commit transient') + error.add_label('TransientTransactionError') + error + end + + def make_transient_overload_error + error = Mongo::Error::OperationFailure.new('transient overload') + error.add_label('TransientTransactionError') + error.add_label('SystemOverloadedError') + error + end + + def make_commit_overload_error + error = Mongo::Error::OperationFailure.new('commit overload') + error.add_label('UnknownTransactionCommitResult') + error.add_label('SystemOverloadedError') + error + end + + # Sub-case 1: callback raises TransientTransactionError + timeout exceeded + context 'when callback raises TransientTransactionError and retry timeout is exceeded' do + it 'propagates the error as TimeoutError with the transient error as cause' do + transient_error = make_transient_error + call_count = 0 + + # Call 1 → deadline setup (returns 100.0, deadline = 100.001). + # Calls 2+ → 200.0, so deadline_expired? is true for subsequent checks. + with_expired_deadline_after(initial_calls: 1) do + expect do + session.with_transaction(timeout_ms: 1) do + call_count += 1 + raise transient_error + end + end.to raise_error(Mongo::Error::TimeoutError) do |err| + expect(err.cause).to eq(transient_error) + end + end + + expect(call_count).to eq(1) + end + end + + # Sub-case 2: commit raises UnknownTransactionCommitResult + timeout exceeded + context 'when commit raises UnknownTransactionCommitResult and retry timeout is exceeded' do + it 'propagates the error as TimeoutError with the commit error as cause' do + commit_error = make_commit_unknown_error + + allow(session).to receive(:commit_transaction) do + raise commit_error + end + + # Call 1 → deadline setup (100.0, deadline = 100.001). + # Call 2 → pre-commit CSOT check at line 540 (100.0, not expired → skip). + # Calls 3+ → 200.0, expired → deadline_expired? true inside commit rescue. + with_expired_deadline_after(initial_calls: 2) do + expect do + session.with_transaction(timeout_ms: 1) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + end + end.to raise_error(Mongo::Error::TimeoutError) do |err| + expect(err.cause).to eq(commit_error) + end + end + end + end + + # Sub-case 3: commit raises TransientTransactionError + timeout exceeded + context 'when commit raises TransientTransactionError and retry timeout is exceeded' do + it 'propagates the error as TimeoutError with the commit error as cause' do + commit_error = make_commit_transient_error + + allow(session).to receive(:commit_transaction) do + raise commit_error + end + + # Same time-control logic as sub-case 2. + with_expired_deadline_after(initial_calls: 2) do + expect do + session.with_transaction(timeout_ms: 1) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + end + end.to raise_error(Mongo::Error::TimeoutError) do |err| + expect(err.cause).to eq(commit_error) + end + end + end + end +end + +# Tests for the "backoff would exceed deadline" check that fires at the top of +# the retry loop (before sleeping). Two bugs exist here: +# 1. In CSOT mode the raised TimeoutError has no .cause (last_error is lost). +# 2. In non-CSOT mode a TimeoutError is raised instead of last_error. +# The same two bugs apply to the commit-overload backoff path. +describe 'Mongo::Session#with_transaction Backoff Deadline is Enforced' do + let(:retry_policy) do + Mongo::Retryable::RetryPolicy.new(adaptive_retries: false) + end + + let(:client) do + instance_double(Mongo::Client).tap do |c| + allow(c).to receive(:retry_policy).and_return(retry_policy) + allow(c).to receive(:timeout_ms).and_return(nil) + end + end + + let(:session) do + sess = Mongo::Session.allocate + sess.instance_variable_set(:@client, client) + sess.instance_variable_set(:@options, {}) + sess.instance_variable_set(:@state, Mongo::Session::NO_TRANSACTION_STATE) + sess.instance_variable_set(:@lock, Mutex.new) + allow(sess).to receive(:check_transactions_supported!).and_return(true) + allow(sess).to receive(:check_if_ended!) + allow(sess).to receive(:log_warn) + allow(sess).to receive(:session_id).and_return(BSON::Document.new('id' => 'test')) + sess + end + + before do + allow(session).to receive(:start_transaction) do |*_args| + session.instance_variable_set(:@state, Mongo::Session::STARTING_TRANSACTION_STATE) + end + + allow(session).to receive(:abort_transaction) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_ABORTED_STATE) + end + + allow(session).to receive(:commit_transaction) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_COMMITTED_STATE) + end + + allow(session).to receive(:sleep) + + # Fix jitter at 1.0 so backoff values are deterministic. + allow(Random).to receive(:rand).and_return(1.0) + allow(retry_policy).to receive(:backoff_delay).and_wrap_original do |m, attempt, **_| + m.call(attempt, jitter: 1.0) + end + end + + # CSOT time control: + # monotonic_time always returns 100.0 + # timeout_ms: 1 → deadline = 100.001 + # regular backoff attempt 1 = 0.005 s → 100.005 >= 100.001 → exceeds deadline + # overload backoff attempt 1 = 0.1 s → 100.1 >= 100.001 → exceeds deadline + # deadline_expired? at line 514 (after error) = 100.0 >= 100.001 → false (skip) + def with_csot_backoff_time_control + allow(Mongo::Utils).to receive(:monotonic_time).and_return(100.0) + yield + end + + # non-CSOT time control: + # 1st monotonic_time call (deadline setup) → 100.0 ⟹ deadline = 220.0 + # subsequent calls → 219.996 + # deadline_expired? = 219.996 >= 220.0 → false (won't fire before backoff check) + # regular backoff: 219.996 + 0.005 = 220.001 >= 220.0 → exceeds + # overload backoff: 219.996 + 0.1 = 220.096 >= 220.0 → exceeds + def with_non_csot_backoff_time_control + call_count = 0 + allow(Mongo::Utils).to receive(:monotonic_time) do + call_count += 1 + call_count == 1 ? 100.0 : 219.996 + end + yield + end + + def make_transient_error + e = Mongo::Error::OperationFailure.new('transient') + e.add_label('TransientTransactionError') + e + end + + def make_transient_overload_error + e = Mongo::Error::OperationFailure.new('transient overload') + e.add_label('TransientTransactionError') + e.add_label('SystemOverloadedError') + e + end + + def make_commit_overload_error + e = Mongo::Error::OperationFailure.new('commit overload') + e.add_label('UnknownTransactionCommitResult') + e.add_label('SystemOverloadedError') + e + end + + # --- Regular (non-overload) backoff exceeds deadline --- + + context 'when regular backoff would exceed CSOT deadline (CSOT mode)' do + it 'raises TimeoutError with last_error as cause' do + last = make_transient_error + + with_csot_backoff_time_control do + expect do + session.with_transaction(timeout_ms: 1) do + raise last + end + end.to raise_error(Mongo::Error::TimeoutError) do |err| + expect(err.cause).to eq(last) + end + end + end + end + + context 'when regular backoff would exceed the 120 s deadline (non-CSOT mode)' do + it 'raises last_error directly (not TimeoutError)' do + last = make_transient_error + + with_non_csot_backoff_time_control do + expect do + session.with_transaction do + raise last + end + end.to raise_error(Mongo::Error::OperationFailure) do |err| + expect(err).to eq(last) + expect(err).not_to be_a(Mongo::Error::TimeoutError) + end + end + end + end + + # --- Overload backoff exceeds deadline --- + + context 'when overload backoff would exceed CSOT deadline (CSOT mode)' do + it 'raises TimeoutError with last_error as cause' do + last = make_transient_overload_error + + with_csot_backoff_time_control do + expect do + session.with_transaction(timeout_ms: 1) do + raise last + end + end.to raise_error(Mongo::Error::TimeoutError) do |err| + expect(err.cause).to eq(last) + end + end + end + end + + context 'when overload backoff would exceed the 120 s deadline (non-CSOT mode)' do + it 'raises last_error directly (not TimeoutError)' do + last = make_transient_overload_error + + with_non_csot_backoff_time_control do + expect do + session.with_transaction do + raise last + end + end.to raise_error(Mongo::Error::OperationFailure) do |err| + expect(err).to eq(last) + expect(err).not_to be_a(Mongo::Error::TimeoutError) + end + end + end + end + + # --- Commit overload backoff exceeds deadline (CSOT only) --- + + context 'when commit overload backoff would exceed CSOT deadline (CSOT mode)' do + it 'raises TimeoutError with the commit error as cause' do + commit_error = make_commit_overload_error + + allow(session).to receive(:commit_transaction) do + raise commit_error + end + + # All monotonic_time calls return 100.0: + # deadline setup → 100.001 + # pre-commit deadline check → not expired + # post-commit-fail deadline check → not expired (hits backoff path instead) + # commit overload backoff_would_exceed_deadline?(100.001, 0.1) → 100.1 >= 100.001 → true + with_csot_backoff_time_control do + expect do + session.with_transaction(timeout_ms: 1) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + end + end.to raise_error(Mongo::Error::TimeoutError) do |err| + expect(err.cause).to eq(commit_error) + end + end + end + end +end From bf72c2fef82723f735d6252d394cf3e7180abb76 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 10:20:08 +0100 Subject: [PATCH 05/18] Fix rubocop offenses in with_transaction_timeout_spec.rb --- .../session/with_transaction_timeout_spec.rb | 378 ++++++------------ 1 file changed, 127 insertions(+), 251 deletions(-) diff --git a/spec/mongo/session/with_transaction_timeout_spec.rb b/spec/mongo/session/with_transaction_timeout_spec.rb index 3757980da7..2dd5e38ac9 100644 --- a/spec/mongo/session/with_transaction_timeout_spec.rb +++ b/spec/mongo/session/with_transaction_timeout_spec.rb @@ -2,23 +2,16 @@ require 'spec_helper' -# Prose tests for the "Retry Timeout is Enforced" section of the -# transactions-convenient-api spec README. +# Prose tests for the "Retry Timeout is Enforced" and "Backoff Deadline is +# Enforced" sections of the transactions-convenient-api spec README. # # specifications/source/transactions-convenient-api/tests/README.md # -# Three sub-cases must be covered: -# 1. Callback raises TransientTransactionError and timeout is exceeded. -# 2. Commit raises UnknownTransactionCommitResult and timeout is exceeded. -# 3. Commit raises TransientTransactionError and timeout is exceeded. -# # Note 1 from spec: "The error SHOULD be propagated as a timeout error if # the language allows to expose the underlying error as a cause of a timeout # error." Ruby supports this via Exception#cause. -describe 'Mongo::Session#with_transaction Retry Timeout is Enforced' do - let(:retry_policy) do - Mongo::Retryable::RetryPolicy.new(adaptive_retries: false) - end +describe 'Mongo::Session#with_transaction timeout enforcement' do + let(:retry_policy) { Mongo::Retryable::RetryPolicy.new(adaptive_retries: false) } let(:client) do instance_double(Mongo::Client).tap do |c| @@ -44,323 +37,206 @@ allow(session).to receive(:start_transaction) do |*_args| session.instance_variable_set(:@state, Mongo::Session::STARTING_TRANSACTION_STATE) end - allow(session).to receive(:abort_transaction) do session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_ABORTED_STATE) end - allow(session).to receive(:commit_transaction) do session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_COMMITTED_STATE) end - allow(session).to receive(:sleep) end - # Stubs Mongo::Utils.monotonic_time to return a fixed "present" time for the - # first `initial_calls` invocations, then a time far in the future for all - # remaining calls. Combined with timeout_ms: 1 (deadline ≈ present + 0.001 s) - # this makes every deadline check after the Nth call return "expired". - def with_expired_deadline_after(initial_calls:, &block) + # Stubs Mongo::Utils.monotonic_time: first `initial_calls` invocations + # return 100.0 (deadline ≈ 100.001 s with timeout_ms: 1), all subsequent + # calls return 200.0, making every deadline check return "expired". + def with_expired_deadline_after(initial_calls:) call_count = 0 allow(Mongo::Utils).to receive(:monotonic_time) do call_count += 1 - call_count <= initial_calls ? 100.0 : 200.0 + (call_count <= initial_calls) ? 100.0 : 200.0 end - block.call + yield + end + + # CSOT time control: monotonic_time always 100.0. + # With timeout_ms: 1, deadline = 100.001. + # Backoffs (0.005 s, 0.1 s) exceed that deadline; deadline_expired? stays false. + def with_csot_backoff_time_control + allow(Mongo::Utils).to receive(:monotonic_time).and_return(100.0) + allow(Random).to receive(:rand).and_return(1.0) + yield + end + + # non-CSOT time control: first call → 100.0 (deadline = 220.0), + # subsequent calls → 219.996. + # deadline_expired? = false; backoffs (0.005, 0.1) exceed the 220.0 deadline. + def with_non_csot_backoff_time_control + call_count = 0 + allow(Mongo::Utils).to receive(:monotonic_time) do + call_count += 1 + (call_count == 1) ? 100.0 : 219.996 + end + allow(Random).to receive(:rand).and_return(1.0) + yield end def make_transient_error - error = Mongo::Error::OperationFailure.new('transient') - error.add_label('TransientTransactionError') - error + Mongo::Error::OperationFailure.new('transient').tap do |e| + e.add_label('TransientTransactionError') + end end def make_commit_unknown_error - error = Mongo::Error::OperationFailure.new('commit unknown') - error.add_label('UnknownTransactionCommitResult') - error + Mongo::Error::OperationFailure.new('commit unknown').tap do |e| + e.add_label('UnknownTransactionCommitResult') + end end def make_commit_transient_error - error = Mongo::Error::OperationFailure.new('commit transient') - error.add_label('TransientTransactionError') - error + Mongo::Error::OperationFailure.new('commit transient').tap do |e| + e.add_label('TransientTransactionError') + end end def make_transient_overload_error - error = Mongo::Error::OperationFailure.new('transient overload') - error.add_label('TransientTransactionError') - error.add_label('SystemOverloadedError') - error + Mongo::Error::OperationFailure.new('transient overload').tap do |e| + e.add_label('TransientTransactionError') + e.add_label('SystemOverloadedError') + end end def make_commit_overload_error - error = Mongo::Error::OperationFailure.new('commit overload') - error.add_label('UnknownTransactionCommitResult') - error.add_label('SystemOverloadedError') - error - end - - # Sub-case 1: callback raises TransientTransactionError + timeout exceeded - context 'when callback raises TransientTransactionError and retry timeout is exceeded' do - it 'propagates the error as TimeoutError with the transient error as cause' do - transient_error = make_transient_error - call_count = 0 - - # Call 1 → deadline setup (returns 100.0, deadline = 100.001). - # Calls 2+ → 200.0, so deadline_expired? is true for subsequent checks. - with_expired_deadline_after(initial_calls: 1) do - expect do - session.with_transaction(timeout_ms: 1) do - call_count += 1 - raise transient_error - end - end.to raise_error(Mongo::Error::TimeoutError) do |err| - expect(err.cause).to eq(transient_error) - end - end - - expect(call_count).to eq(1) + Mongo::Error::OperationFailure.new('commit overload').tap do |e| + e.add_label('UnknownTransactionCommitResult') + e.add_label('SystemOverloadedError') end end - # Sub-case 2: commit raises UnknownTransactionCommitResult + timeout exceeded - context 'when commit raises UnknownTransactionCommitResult and retry timeout is exceeded' do - it 'propagates the error as TimeoutError with the commit error as cause' do - commit_error = make_commit_unknown_error + # --------------------------------------------------------------------------- + # "Retry Timeout is Enforced" — three sub-cases from the spec README + # --------------------------------------------------------------------------- - allow(session).to receive(:commit_transaction) do - raise commit_error - end + describe '"Retry Timeout is Enforced" prose tests' do + context 'when callback raises TransientTransactionError and retry timeout is exceeded' do + let(:transient_error) { make_transient_error } - # Call 1 → deadline setup (100.0, deadline = 100.001). - # Call 2 → pre-commit CSOT check at line 540 (100.0, not expired → skip). - # Calls 3+ → 200.0, expired → deadline_expired? true inside commit rescue. - with_expired_deadline_after(initial_calls: 2) do - expect do - session.with_transaction(timeout_ms: 1) do - session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) - end - end.to raise_error(Mongo::Error::TimeoutError) do |err| - expect(err.cause).to eq(commit_error) + it 'propagates the error as TimeoutError with the transient error as cause' do + with_expired_deadline_after(initial_calls: 1) do + ex = expect { session.with_transaction(timeout_ms: 1) { raise transient_error } } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(transient_error) } end end end - end - # Sub-case 3: commit raises TransientTransactionError + timeout exceeded - context 'when commit raises TransientTransactionError and retry timeout is exceeded' do - it 'propagates the error as TimeoutError with the commit error as cause' do - commit_error = make_commit_transient_error + context 'when commit raises UnknownTransactionCommitResult and retry timeout is exceeded' do + let(:commit_error) { make_commit_unknown_error } - allow(session).to receive(:commit_transaction) do - raise commit_error - end + before { allow(session).to receive(:commit_transaction) { raise commit_error } } - # Same time-control logic as sub-case 2. - with_expired_deadline_after(initial_calls: 2) do - expect do - session.with_transaction(timeout_ms: 1) do - session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + it 'propagates the error as TimeoutError with the commit error as cause' do + with_expired_deadline_after(initial_calls: 2) do + ex = expect do + session.with_transaction(timeout_ms: 1) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + end end - end.to raise_error(Mongo::Error::TimeoutError) do |err| - expect(err.cause).to eq(commit_error) + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(commit_error) } end end end - end -end - -# Tests for the "backoff would exceed deadline" check that fires at the top of -# the retry loop (before sleeping). Two bugs exist here: -# 1. In CSOT mode the raised TimeoutError has no .cause (last_error is lost). -# 2. In non-CSOT mode a TimeoutError is raised instead of last_error. -# The same two bugs apply to the commit-overload backoff path. -describe 'Mongo::Session#with_transaction Backoff Deadline is Enforced' do - let(:retry_policy) do - Mongo::Retryable::RetryPolicy.new(adaptive_retries: false) - end - - let(:client) do - instance_double(Mongo::Client).tap do |c| - allow(c).to receive(:retry_policy).and_return(retry_policy) - allow(c).to receive(:timeout_ms).and_return(nil) - end - end - - let(:session) do - sess = Mongo::Session.allocate - sess.instance_variable_set(:@client, client) - sess.instance_variable_set(:@options, {}) - sess.instance_variable_set(:@state, Mongo::Session::NO_TRANSACTION_STATE) - sess.instance_variable_set(:@lock, Mutex.new) - allow(sess).to receive(:check_transactions_supported!).and_return(true) - allow(sess).to receive(:check_if_ended!) - allow(sess).to receive(:log_warn) - allow(sess).to receive(:session_id).and_return(BSON::Document.new('id' => 'test')) - sess - end - - before do - allow(session).to receive(:start_transaction) do |*_args| - session.instance_variable_set(:@state, Mongo::Session::STARTING_TRANSACTION_STATE) - end - allow(session).to receive(:abort_transaction) do - session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_ABORTED_STATE) - end + context 'when commit raises TransientTransactionError and retry timeout is exceeded' do + let(:commit_error) { make_commit_transient_error } - allow(session).to receive(:commit_transaction) do - session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_COMMITTED_STATE) - end + before { allow(session).to receive(:commit_transaction) { raise commit_error } } - allow(session).to receive(:sleep) - - # Fix jitter at 1.0 so backoff values are deterministic. - allow(Random).to receive(:rand).and_return(1.0) - allow(retry_policy).to receive(:backoff_delay).and_wrap_original do |m, attempt, **_| - m.call(attempt, jitter: 1.0) + it 'propagates the error as TimeoutError with the commit error as cause' do + with_expired_deadline_after(initial_calls: 2) do + ex = expect do + session.with_transaction(timeout_ms: 1) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + end + end + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(commit_error) } + end + end end end - # CSOT time control: - # monotonic_time always returns 100.0 - # timeout_ms: 1 → deadline = 100.001 - # regular backoff attempt 1 = 0.005 s → 100.005 >= 100.001 → exceeds deadline - # overload backoff attempt 1 = 0.1 s → 100.1 >= 100.001 → exceeds deadline - # deadline_expired? at line 514 (after error) = 100.0 >= 100.001 → false (skip) - def with_csot_backoff_time_control - allow(Mongo::Utils).to receive(:monotonic_time).and_return(100.0) - yield - end + # --------------------------------------------------------------------------- + # "Backoff Deadline is Enforced" — backoff-would-exceed-deadline paths + # --------------------------------------------------------------------------- - # non-CSOT time control: - # 1st monotonic_time call (deadline setup) → 100.0 ⟹ deadline = 220.0 - # subsequent calls → 219.996 - # deadline_expired? = 219.996 >= 220.0 → false (won't fire before backoff check) - # regular backoff: 219.996 + 0.005 = 220.001 >= 220.0 → exceeds - # overload backoff: 219.996 + 0.1 = 220.096 >= 220.0 → exceeds - def with_non_csot_backoff_time_control - call_count = 0 - allow(Mongo::Utils).to receive(:monotonic_time) do - call_count += 1 - call_count == 1 ? 100.0 : 219.996 + describe '"Backoff Deadline is Enforced" prose tests' do + before do + allow(retry_policy).to receive(:backoff_delay).and_wrap_original do |m, attempt, **_| + m.call(attempt, jitter: 1.0) + end end - yield - end - - def make_transient_error - e = Mongo::Error::OperationFailure.new('transient') - e.add_label('TransientTransactionError') - e - end - - def make_transient_overload_error - e = Mongo::Error::OperationFailure.new('transient overload') - e.add_label('TransientTransactionError') - e.add_label('SystemOverloadedError') - e - end - def make_commit_overload_error - e = Mongo::Error::OperationFailure.new('commit overload') - e.add_label('UnknownTransactionCommitResult') - e.add_label('SystemOverloadedError') - e - end - - # --- Regular (non-overload) backoff exceeds deadline --- - - context 'when regular backoff would exceed CSOT deadline (CSOT mode)' do - it 'raises TimeoutError with last_error as cause' do - last = make_transient_error + context 'when regular backoff would exceed CSOT deadline' do + let(:last_error) { make_transient_error } - with_csot_backoff_time_control do - expect do - session.with_transaction(timeout_ms: 1) do - raise last - end - end.to raise_error(Mongo::Error::TimeoutError) do |err| - expect(err.cause).to eq(last) + it 'raises TimeoutError with last_error as cause' do + with_csot_backoff_time_control do + ex = expect { session.with_transaction(timeout_ms: 1) { raise last_error } } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(last_error) } end end end - end - context 'when regular backoff would exceed the 120 s deadline (non-CSOT mode)' do - it 'raises last_error directly (not TimeoutError)' do - last = make_transient_error + context 'when regular backoff would exceed the 120 s deadline (non-CSOT)' do + let(:last_error) { make_transient_error } - with_non_csot_backoff_time_control do - expect do - session.with_transaction do - raise last + it 'raises last_error directly (not TimeoutError)' do + with_non_csot_backoff_time_control do + ex = expect { session.with_transaction { raise last_error } } + ex.to raise_error(Mongo::Error::OperationFailure) do |e| + expect(e).to eq(last_error) + expect(e).not_to be_a(Mongo::Error::TimeoutError) end - end.to raise_error(Mongo::Error::OperationFailure) do |err| - expect(err).to eq(last) - expect(err).not_to be_a(Mongo::Error::TimeoutError) end end end - end - # --- Overload backoff exceeds deadline --- + context 'when overload backoff would exceed CSOT deadline' do + let(:last_error) { make_transient_overload_error } - context 'when overload backoff would exceed CSOT deadline (CSOT mode)' do - it 'raises TimeoutError with last_error as cause' do - last = make_transient_overload_error - - with_csot_backoff_time_control do - expect do - session.with_transaction(timeout_ms: 1) do - raise last - end - end.to raise_error(Mongo::Error::TimeoutError) do |err| - expect(err.cause).to eq(last) + it 'raises TimeoutError with last_error as cause' do + with_csot_backoff_time_control do + ex = expect { session.with_transaction(timeout_ms: 1) { raise last_error } } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(last_error) } end end end - end - context 'when overload backoff would exceed the 120 s deadline (non-CSOT mode)' do - it 'raises last_error directly (not TimeoutError)' do - last = make_transient_overload_error + context 'when overload backoff would exceed the 120 s deadline (non-CSOT)' do + let(:last_error) { make_transient_overload_error } - with_non_csot_backoff_time_control do - expect do - session.with_transaction do - raise last + it 'raises last_error directly (not TimeoutError)' do + with_non_csot_backoff_time_control do + ex = expect { session.with_transaction { raise last_error } } + ex.to raise_error(Mongo::Error::OperationFailure) do |e| + expect(e).to eq(last_error) + expect(e).not_to be_a(Mongo::Error::TimeoutError) end - end.to raise_error(Mongo::Error::OperationFailure) do |err| - expect(err).to eq(last) - expect(err).not_to be_a(Mongo::Error::TimeoutError) end end end - end - - # --- Commit overload backoff exceeds deadline (CSOT only) --- - context 'when commit overload backoff would exceed CSOT deadline (CSOT mode)' do - it 'raises TimeoutError with the commit error as cause' do - commit_error = make_commit_overload_error + context 'when commit overload backoff would exceed CSOT deadline' do + let(:commit_error) { make_commit_overload_error } - allow(session).to receive(:commit_transaction) do - raise commit_error - end + before { allow(session).to receive(:commit_transaction) { raise commit_error } } - # All monotonic_time calls return 100.0: - # deadline setup → 100.001 - # pre-commit deadline check → not expired - # post-commit-fail deadline check → not expired (hits backoff path instead) - # commit overload backoff_would_exceed_deadline?(100.001, 0.1) → 100.1 >= 100.001 → true - with_csot_backoff_time_control do - expect do - session.with_transaction(timeout_ms: 1) do - session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + it 'raises TimeoutError with the commit error as cause' do + with_csot_backoff_time_control do + ex = expect do + session.with_transaction(timeout_ms: 1) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + end end - end.to raise_error(Mongo::Error::TimeoutError) do |err| - expect(err.cause).to eq(commit_error) + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(commit_error) } end end end From c7a3ba5a4c2bc7d03b7d164a0dded64cdcde46eb Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 10:45:46 +0100 Subject: [PATCH 06/18] Fix session_transaction_spec to use CSOT for timeout test The test expected TimeoutError from a non-CSOT with_transaction call. Per the spec, non-CSOT mode re-raises the original error directly. Add timeout_ms: 5000 to enable CSOT so the test matches its intent. --- spec/mongo/session_transaction_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/mongo/session_transaction_spec.rb b/spec/mongo/session_transaction_spec.rb index 6e153a7fd9..512841b1a3 100644 --- a/spec/mongo/session_transaction_spec.rb +++ b/spec/mongo/session_transaction_spec.rb @@ -136,7 +136,7 @@ class SessionTransactionSpecError < StandardError; end allow(session).to receive('check_transactions_supported!').and_return true expect do - session.with_transaction do + session.with_transaction(timeout_ms: 5000) do exc = Mongo::Error::OperationFailure.new('timeout test') exc.add_label('TransientTransactionError') raise exc From 17d2b40de3b50610d8e5b321f2be48a51a1bc0de Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 11:31:35 +0100 Subject: [PATCH 07/18] Simplify stuff --- lib/mongo/session.rb | 42 +++++-------------- .../session/with_transaction_timeout_spec.rb | 24 +++++------ 2 files changed, 23 insertions(+), 43 deletions(-) diff --git a/lib/mongo/session.rb b/lib/mongo/session.rb index 86f7665a9f..eee8f2dc42 100644 --- a/lib/mongo/session.rb +++ b/lib/mongo/session.rb @@ -475,7 +475,7 @@ def with_transaction(options = nil) if overload_encountered delay = @client.retry_policy.backoff_delay(overload_error_count) if backoff_would_exceed_deadline?(deadline, delay) - make_timeout_error_from(last_error) + make_timeout_error_from(last_error, 'CSOT timeout expired waiting to retry withTransaction') end unless @client.retry_policy.should_retry_overload?(overload_error_count, delay) raise(last_error) @@ -484,7 +484,7 @@ def with_transaction(options = nil) else backoff = backoff_seconds_for_retry(transaction_attempt) if backoff_would_exceed_deadline?(deadline, backoff) - make_timeout_error_from(last_error) + make_timeout_error_from(last_error, 'CSOT timeout expired waiting to retry withTransaction') end sleep(backoff) end @@ -513,11 +513,7 @@ def with_transaction(options = nil) if deadline_expired?(deadline) transaction_in_progress = false - if @with_transaction_timeout_ms - raise Mongo::Error::TimeoutError, 'CSOT timeout expired during withTransaction callback' - else - raise - end + make_timeout_error_from(e, 'CSOT timeout expired during withTransaction callback') end if e.is_a?(Mongo::Error) && e.label?('TransientTransactionError') @@ -559,7 +555,7 @@ def with_transaction(options = nil) then transaction_in_progress = false if @with_transaction_timeout_ms && deadline_expired?(deadline) - raise Mongo::Error::TimeoutError, 'CSOT timeout expired during withTransaction commit' + make_timeout_error_from(e, 'CSOT timeout expired during withTransaction commit') else raise end @@ -577,11 +573,7 @@ def with_transaction(options = nil) delay = @client.retry_policy.backoff_delay(overload_error_count) if backoff_would_exceed_deadline?(deadline, delay) transaction_in_progress = false - if @with_transaction_timeout_ms - raise Mongo::Error::TimeoutError, 'CSOT timeout expired during withTransaction commit' - else - raise - end + make_timeout_error_from(e, 'CSOT timeout expired during withTransaction commit') end unless @client.retry_policy.should_retry_overload?(overload_error_count, delay) transaction_in_progress = false @@ -603,11 +595,7 @@ def with_transaction(options = nil) elsif e.label?('TransientTransactionError') if Utils.monotonic_time >= deadline transaction_in_progress = false - if @with_transaction_timeout_ms - raise Mongo::Error::TimeoutError, 'CSOT timeout expired during withTransaction commit' - else - raise - end + make_timeout_error_from(e, 'CSOT timeout expired during withTransaction commit') end last_error = e if e.label?('SystemOverloadedError') @@ -1454,23 +1442,15 @@ def backoff_would_exceed_deadline?(deadline, backoff_seconds) end # Implements makeTimeoutError(lastError) from the transactions-convenient-api spec. - # Called when the withTransaction retry loop cannot continue because backoff would - # exceed the deadline. - # - # - CSOT mode: raises TimeoutError with lastError as Ruby's .cause - # - non-CSOT mode: re-raises lastError directly - # - # Must be called from outside a rescue block; raises unconditionally. - def make_timeout_error_from(last_error) + # In CSOT mode raises TimeoutError with last_error's message included as a substring. + # In non-CSOT mode re-raises last_error directly. + def make_timeout_error_from(last_error, timeout_message) if @with_transaction_timeout_ms - begin - raise last_error - rescue - raise Mongo::Error::TimeoutError, 'CSOT timeout expired waiting to retry withTransaction' - end + raise Mongo::Error::TimeoutError, "#{timeout_message}: #{last_error}" else raise last_error end end + end end diff --git a/spec/mongo/session/with_transaction_timeout_spec.rb b/spec/mongo/session/with_transaction_timeout_spec.rb index 2dd5e38ac9..694a4c6028 100644 --- a/spec/mongo/session/with_transaction_timeout_spec.rb +++ b/spec/mongo/session/with_transaction_timeout_spec.rb @@ -120,10 +120,10 @@ def make_commit_overload_error context 'when callback raises TransientTransactionError and retry timeout is exceeded' do let(:transient_error) { make_transient_error } - it 'propagates the error as TimeoutError with the transient error as cause' do + it 'propagates the error as TimeoutError including the transient error message' do with_expired_deadline_after(initial_calls: 1) do ex = expect { session.with_transaction(timeout_ms: 1) { raise transient_error } } - ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(transient_error) } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.message).to include(transient_error.message) } end end end @@ -133,14 +133,14 @@ def make_commit_overload_error before { allow(session).to receive(:commit_transaction) { raise commit_error } } - it 'propagates the error as TimeoutError with the commit error as cause' do + it 'propagates the error as TimeoutError including the commit error message' do with_expired_deadline_after(initial_calls: 2) do ex = expect do session.with_transaction(timeout_ms: 1) do session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) end end - ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(commit_error) } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.message).to include(commit_error.message) } end end end @@ -150,14 +150,14 @@ def make_commit_overload_error before { allow(session).to receive(:commit_transaction) { raise commit_error } } - it 'propagates the error as TimeoutError with the commit error as cause' do + it 'propagates the error as TimeoutError including the commit error message' do with_expired_deadline_after(initial_calls: 2) do ex = expect do session.with_transaction(timeout_ms: 1) do session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) end end - ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(commit_error) } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.message).to include(commit_error.message) } end end end @@ -177,10 +177,10 @@ def make_commit_overload_error context 'when regular backoff would exceed CSOT deadline' do let(:last_error) { make_transient_error } - it 'raises TimeoutError with last_error as cause' do + it 'raises TimeoutError including last_error message' do with_csot_backoff_time_control do ex = expect { session.with_transaction(timeout_ms: 1) { raise last_error } } - ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(last_error) } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.message).to include(last_error.message) } end end end @@ -202,10 +202,10 @@ def make_commit_overload_error context 'when overload backoff would exceed CSOT deadline' do let(:last_error) { make_transient_overload_error } - it 'raises TimeoutError with last_error as cause' do + it 'raises TimeoutError including last_error message' do with_csot_backoff_time_control do ex = expect { session.with_transaction(timeout_ms: 1) { raise last_error } } - ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(last_error) } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.message).to include(last_error.message) } end end end @@ -229,14 +229,14 @@ def make_commit_overload_error before { allow(session).to receive(:commit_transaction) { raise commit_error } } - it 'raises TimeoutError with the commit error as cause' do + it 'raises TimeoutError including the commit error message' do with_csot_backoff_time_control do ex = expect do session.with_transaction(timeout_ms: 1) do session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) end end - ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.cause).to eq(commit_error) } + ex.to raise_error(Mongo::Error::TimeoutError) { |e| expect(e.message).to include(commit_error.message) } end end end From df7c2f7bea03db588e89f9746a6cdfbe4627cae8 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 13:34:09 +0100 Subject: [PATCH 08/18] Sync with master --- .evergreen/config.yml | 4 ++-- .evergreen/config/standard.yml.erb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 32591a083e..2d321d0b38 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -1273,7 +1273,7 @@ buildvariants: - matrix_name: CSOT matrix_spec: ruby: "ruby-4.0" - mongodb-version: ["8.0", "rapid"] + mongodb-version: "8.0" topology: replica-set-single-node os: ubuntu2204 display_name: "CSOT - ${mongodb-version}" @@ -1532,7 +1532,7 @@ buildvariants: auth-and-ssl: "noauth-and-nossl" ruby: ["ruby-4.0", "ruby-3.4", "ruby-3.3", "ruby-3.2", "ruby-3.1"] topology: [replica-set, sharded-cluster] - mongodb-version: [ '6.0', '7.0', '8.0', 'rapid' ] + mongodb-version: [ '6.0', '7.0', '8.0' ] os: ubuntu2204 fle: helper display_name: "FLE: ${mongodb-version} ${topology} ${ruby}" diff --git a/.evergreen/config/standard.yml.erb b/.evergreen/config/standard.yml.erb index c960841a61..2dbc222dd0 100644 --- a/.evergreen/config/standard.yml.erb +++ b/.evergreen/config/standard.yml.erb @@ -151,7 +151,7 @@ buildvariants: - matrix_name: CSOT matrix_spec: ruby: <%= latest_ruby %> - mongodb-version: <%= stable_and_rapid %> + mongodb-version: <%= latest_stable_mdb %> topology: replica-set-single-node os: ubuntu2204 display_name: "CSOT - ${mongodb-version}" @@ -350,7 +350,7 @@ buildvariants: auth-and-ssl: "noauth-and-nossl" ruby: <%= supported_mri_rubies_3_ubuntu %> topology: [replica-set, sharded-cluster] - mongodb-version: [ '6.0', '7.0', '8.0', 'rapid' ] + mongodb-version: [ '6.0', '7.0', '8.0' ] os: ubuntu2204 fle: helper display_name: "FLE: ${mongodb-version} ${topology} ${ruby}" From 9ac5b71bd782ed02c1983a2cb90647c581249998 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 14:09:48 +0100 Subject: [PATCH 09/18] Prepare Session interface for WithTransactionRunner extraction --- lib/mongo/session.rb | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/lib/mongo/session.rb b/lib/mongo/session.rb index eee8f2dc42..70fe9436f1 100644 --- a/lib/mongo/session.rb +++ b/lib/mongo/session.rb @@ -1310,6 +1310,30 @@ def inside_with_transaction? @inside_with_transaction end + # Nils the with_transaction deadline, allowing subsequent operations + # (e.g. abort_transaction) to use a fresh timeout rather than an + # already-expired one. Called by WithTransactionRunner. + def clear_with_transaction_deadline! + @with_transaction_deadline = nil + end + + protected + + # Readable by WithTransactionRunner to detect CSOT mode. + attr_reader :with_transaction_timeout_ms + + # Resets transaction state to NO_TRANSACTION_STATE without calling + # abort_transaction. Used by WithTransactionRunner after a + # TransientTransactionError during commit — the server has already + # rolled back, so no server-side cleanup is needed. + def reset_transaction_state! + @state = NO_TRANSACTION_STATE + end + + def within_states?(*states) + states.include?(@state) + end + private # Get the read concern the session will use when starting a transaction. @@ -1327,10 +1351,6 @@ def txn_read_concern txn_options[:read_concern] || @client.read_concern end - def within_states?(*states) - states.include?(@state) - end - def check_if_no_transaction! return unless within_states?(NO_TRANSACTION_STATE) From c158c6f66dd3c003b562f03e2fbed6168bfcf024 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 16:28:47 +0100 Subject: [PATCH 10/18] Add WithTransactionRunner skeleton with leaf methods --- lib/mongo/session.rb | 1 + lib/mongo/session/with_transaction_runner.rb | 121 +++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 lib/mongo/session/with_transaction_runner.rb diff --git a/lib/mongo/session.rb b/lib/mongo/session.rb index 70fe9436f1..55e2c3815b 100644 --- a/lib/mongo/session.rb +++ b/lib/mongo/session.rb @@ -17,6 +17,7 @@ require 'mongo/session/session_pool' require 'mongo/session/server_session' +require 'mongo/session/with_transaction_runner' module Mongo diff --git a/lib/mongo/session/with_transaction_runner.rb b/lib/mongo/session/with_transaction_runner.rb new file mode 100644 index 0000000000..f078ccb729 --- /dev/null +++ b/lib/mongo/session/with_transaction_runner.rb @@ -0,0 +1,121 @@ +# frozen_string_literal: true + +module Mongo + class Session + # Owns the retry loop state and logic for Session#with_transaction. + # + # Control-flow contract: + # return — step succeeded, continue + # raise — unrecoverable error, propagate to caller + # throw :retry — restart the transaction loop from the top + # + # @api private + class WithTransactionRunner + BACKOFF_INITIAL = 0.005 + BACKOFF_MAX = 0.5 + private_constant :BACKOFF_INITIAL, :BACKOFF_MAX + + def initialize(session, options) + @session = session + @options = options + @csot = !session.with_transaction_timeout_ms.nil? + csot_deadline = session.with_transaction_deadline + # Non-CSOT: apply 120-second safety limit. + # CSOT: use computed deadline (0 = infinite when timeout_ms: 0). + @deadline = csot_deadline.nil? ? (Utils.monotonic_time + 120) : csot_deadline + @last_error = nil + @transaction_attempt = 0 + @overload_encountered = false + @overload_error_count = 0 + @transaction_in_progress = false + end + + private + + # -- Deadline helpers -------------------------------------------------- + + def deadline_expired? + @deadline.zero? ? false : Utils.monotonic_time >= @deadline + end + + def backoff_would_exceed_deadline?(secs) + @deadline.zero? ? false : Utils.monotonic_time + secs >= @deadline + end + + # -- Backoff ----------------------------------------------------------- + + def backoff_seconds_for_retry + exponential = BACKOFF_INITIAL * (1.5**(@transaction_attempt - 1)) + Random.rand * [ exponential, BACKOFF_MAX ].min + end + + # -- Timeout error ----------------------------------------------------- + + # Raises TimeoutError (CSOT) or re-raises last_error (non-CSOT). + # @csot mirrors the original `if @with_transaction_timeout_ms` check: + # in Ruby, 0 is truthy, so timeout_ms: 0 (infinite CSOT) sets @csot = true + # just as `if 0` evaluates to true — the semantics are identical. + def make_timeout_error_from(last_error, message) + raise Mongo::Error::TimeoutError, "#{message}: #{last_error}" if @csot + + raise last_error + end + + # -- Overload tracking ------------------------------------------------- + + # Updates @overload_encountered and @overload_error_count. + # Does not raise or return a meaningful value. + def track_overload(err) + if err.label?('SystemOverloadedError') + @overload_encountered = true + @overload_error_count += 1 + elsif @overload_encountered + @overload_error_count += 1 + @session.client.retry_policy.record_non_overload_retry_failure + end + end + + # -- Commit helpers ---------------------------------------------------- + + def build_commit_options + @options ? { write_concern: @options[:write_concern] } : {} + end + + # Escalates write concern to w: :majority for commit retries. + # Note: wtimeout is NOT added here — commit_transaction handles that + # internally when it detects a retry context. + def escalate_write_concern!(opts) + wc = case (v = opts[:write_concern]) + when WriteConcern::Base then v.options + when nil then {} + else v + end + opts[:write_concern] = wc.merge(w: :majority) + end + + # -- Ensure guard ------------------------------------------------------ + + # Called from run's ensure. Aborts if a break/external Timeout escaped + # the loop while a transaction was in progress. + def abort_if_in_progress + return unless @transaction_in_progress + + @session.log_warn( + 'with_transaction callback broke out of with_transaction loop, ' \ + 'aborting transaction' + ) + begin + @session.abort_transaction + rescue Error::OperationFailure::Family, Error::InvalidTransactionOperation + # Ignore — transaction may already be in an inconsistent state. + end + end + + # -- Sleep wrapper (stubbable in tests) -------------------------------- + + def sleep(secs) + Kernel.sleep(secs) + end + end + end +end From 7721537afc0024269866de89e7d2384eace43a80 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 16:29:32 +0100 Subject: [PATCH 11/18] Add pre_retry_backoff to WithTransactionRunner --- lib/mongo/session/with_transaction_runner.rb | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/mongo/session/with_transaction_runner.rb b/lib/mongo/session/with_transaction_runner.rb index f078ccb729..86d76f7a33 100644 --- a/lib/mongo/session/with_transaction_runner.rb +++ b/lib/mongo/session/with_transaction_runner.rb @@ -75,6 +75,28 @@ def track_overload(err) end end + # -- Pre-retry backoff ------------------------------------------------- + + # Sleeps before the next transaction attempt, checking the deadline first. + # Two branches: overload path (adaptive) and normal path (exponential). + def pre_retry_backoff + if @overload_encountered + delay = @session.client.retry_policy.backoff_delay(@overload_error_count) + if backoff_would_exceed_deadline?(delay) + make_timeout_error_from(@last_error, 'CSOT timeout expired waiting to retry withTransaction') + end + raise @last_error unless @session.client.retry_policy.should_retry_overload?(@overload_error_count, delay) + + sleep(delay) + else + backoff = backoff_seconds_for_retry + if backoff_would_exceed_deadline?(backoff) + make_timeout_error_from(@last_error, 'CSOT timeout expired waiting to retry withTransaction') + end + sleep(backoff) + end + end + # -- Commit helpers ---------------------------------------------------- def build_commit_options From 0f5ba0541f66f2e6d205e8d333391fc61088d17f Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 16:34:16 +0100 Subject: [PATCH 12/18] Add execute_callback and helpers to WithTransactionRunner --- lib/mongo/session/with_transaction_runner.rb | 46 ++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/lib/mongo/session/with_transaction_runner.rb b/lib/mongo/session/with_transaction_runner.rb index 86d76f7a33..2f34e368ca 100644 --- a/lib/mongo/session/with_transaction_runner.rb +++ b/lib/mongo/session/with_transaction_runner.rb @@ -75,6 +75,52 @@ def track_overload(err) end end + # -- execute_callback helpers ------------------------------------------ + + # Aborts the transaction if it is currently active. + # Clears the deadline first if it is already expired so that abort gets + # a fresh timeout rather than the expired one. + def abort_in_progress_transaction(err) + return unless @session.within_states?( + Session::STARTING_TRANSACTION_STATE, + Session::TRANSACTION_IN_PROGRESS_STATE + ) + + @session.log_warn("Aborting transaction due to #{err.class}: #{err}") + @session.clear_with_transaction_deadline! if @csot && deadline_expired? + @session.abort_transaction + @transaction_in_progress = false + end + + # Raises if the deadline has passed. + # In CSOT mode raises TimeoutError; in non-CSOT mode re-raises last_error. + def raise_or_retry_on_deadline!(err) + return unless deadline_expired? + + make_timeout_error_from(err, 'CSOT timeout expired during withTransaction callback') + end + + # Handles the error from the callback. + # Throws :retry for transient errors; re-raises everything else. + def handle_transient_callback_error(err) + raise err unless err.is_a?(Mongo::Error) && err.label?('TransientTransactionError') + + @last_error = err + track_overload(err) + throw :retry + end + + # Runs the user's block; handles errors using the three helpers above. + # rubocop:disable Lint/RescueException + def execute_callback + yield + rescue Exception => e + abort_in_progress_transaction(e) + raise_or_retry_on_deadline!(e) + handle_transient_callback_error(e) + end + # rubocop:enable Lint/RescueException + # -- Pre-retry backoff ------------------------------------------------- # Sleeps before the next transaction attempt, checking the deadline first. From 754cb383fd94d0dc0739bf17e6dac54a01215958 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 16:47:17 +0100 Subject: [PATCH 13/18] Add commit helpers and timeout_ms:0 regression test to WithTransactionRunner --- lib/mongo/session/with_transaction_runner.rb | 116 ++++++++++++++++++ .../session/with_transaction_timeout_spec.rb | 27 ++++ 2 files changed, 143 insertions(+) diff --git a/lib/mongo/session/with_transaction_runner.rb b/lib/mongo/session/with_transaction_runner.rb index 2f34e368ca..0beed74724 100644 --- a/lib/mongo/session/with_transaction_runner.rb +++ b/lib/mongo/session/with_transaction_runner.rb @@ -145,6 +145,122 @@ def pre_retry_backoff # -- Commit helpers ---------------------------------------------------- + # Returns true if the session is no longer in an active transaction state + # (the callback may have aborted or committed the transaction itself). + def transaction_no_longer_active? + return false unless @session.within_states?( + Session::TRANSACTION_ABORTED_STATE, + Session::NO_TRANSACTION_STATE, + Session::TRANSACTION_COMMITTED_STATE + ) + + @transaction_in_progress = false + true + end + + # CSOT-only: aborts and raises TimeoutError if the deadline has expired + # before we even try to commit. + def check_deadline_before_commit! + return unless @csot && deadline_expired? + + @session.clear_with_transaction_deadline! + @session.abort_transaction + @transaction_in_progress = false + raise Mongo::Error::TimeoutError, 'CSOT timeout expired before transaction could be committed' + end + + # Handles a TransientTransactionError raised during commit. + # Raises on deadline; otherwise records state and throws :retry. + # Note: uses deadline_expired? which correctly returns false for deadline 0 + # (timeout_ms: 0 = infinite CSOT), fixing a bug in the original code that + # used `Utils.monotonic_time >= deadline` — true when deadline == 0. + def handle_transient_commit_error(err) + if deadline_expired? + @transaction_in_progress = false + make_timeout_error_from(err, 'CSOT timeout expired during withTransaction commit') + end + @last_error = err + track_overload(err) + @session.reset_transaction_state! + throw :retry + end + + # Raises if the commit deadline or max_time_ms has expired. + def check_unknown_commit_deadline(err) + return unless deadline_expired? || (err.is_a?(Error::OperationFailure::Family) && err.max_time_ms_expired?) + + @transaction_in_progress = false + raise err unless @csot && deadline_expired? + + make_timeout_error_from(err, 'CSOT timeout expired during withTransaction commit') + end + + # Handles the overload path for unknown-commit retries. + # Sleeps and returns normally to let the caller retry with escalated wc. + # Raises or calls make_timeout_error_from if retry is not possible. + def handle_unknown_commit_overload(err) + return unless @overload_encountered + + delay = @session.client.retry_policy.backoff_delay(@overload_error_count) + if backoff_would_exceed_deadline?(delay) + @transaction_in_progress = false + make_timeout_error_from(err, 'CSOT timeout expired during withTransaction commit') + end + unless @session.client.retry_policy.should_retry_overload?(@overload_error_count, delay) + @transaction_in_progress = false + raise err + end + sleep(delay) + end + + # Handles an UnknownTransactionCommitResult error. + # Raises on deadline or max_time_ms expiry. Sleeps and falls through + # for overload-driven retries (caller escalates write concern). + def handle_unknown_commit_result(err) + check_unknown_commit_deadline(err) + track_overload(err) + handle_unknown_commit_overload(err) + end + + # Routes commit errors to the appropriate handler. + def handle_commit_error(err) + if err.label?('UnknownTransactionCommitResult') + handle_unknown_commit_result(err) + elsif err.label?('TransientTransactionError') + handle_transient_commit_error(err) + else + @transaction_in_progress = false + raise err + end + end + + # Inner commit+retry loop. Escalates write concern for retriable paths. + # Error::AuthError < RuntimeError (not < Mongo::Error), so it requires + # its own rescue clause. + def commit_with_escalation(result) + commit_options = build_commit_options + loop do + @session.commit_transaction(commit_options) + @transaction_in_progress = false + return result + rescue Mongo::Error => e + handle_commit_error(e) + escalate_write_concern!(commit_options) + rescue Error::AuthError + @transaction_in_progress = false + raise + end + end + + # Top-level commit. Skips if the block already managed the transaction; + # checks the CSOT deadline; then enters the escalation loop. + def commit(result) + return result if transaction_no_longer_active? + + check_deadline_before_commit! + commit_with_escalation(result) + end + def build_commit_options @options ? { write_concern: @options[:write_concern] } : {} end diff --git a/spec/mongo/session/with_transaction_timeout_spec.rb b/spec/mongo/session/with_transaction_timeout_spec.rb index 694a4c6028..f32e01e3bb 100644 --- a/spec/mongo/session/with_transaction_timeout_spec.rb +++ b/spec/mongo/session/with_transaction_timeout_spec.rb @@ -224,6 +224,33 @@ def make_commit_overload_error end end + # Regression test: the original with_transaction used `Utils.monotonic_time >= deadline` + # for the TransientTransactionError commit check, which is always true when + # deadline == 0 (timeout_ms: 0 = infinite CSOT). WithTransactionRunner fixes + # this with deadline_expired? that guards on @deadline.zero?. + context 'when timeout_ms: 0 (infinite CSOT) and commit raises TransientTransactionError' do + let(:commit_error) { make_commit_transient_error } + let(:success_return_value) { 42 } + + it 'retries and succeeds (does not raise TimeoutError)' do + allow(Mongo::Utils).to receive(:monotonic_time).and_return(0.0) + + call_count = 0 + allow(session).to receive(:commit_transaction) do + call_count += 1 + raise commit_error if call_count == 1 + + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_COMMITTED_STATE) + end + + result = session.with_transaction(timeout_ms: 0) do + session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) + success_return_value + end + expect(result).to eq(success_return_value) + end + end + context 'when commit overload backoff would exceed CSOT deadline' do let(:commit_error) { make_commit_overload_error } From 71c9b70606a9a7c2a3ef39e8e0da7eb232c3bbd5 Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 17:13:45 +0100 Subject: [PATCH 14/18] Add run_attempt and run to WithTransactionRunner --- lib/mongo/session/with_transaction_runner.rb | 42 ++++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/lib/mongo/session/with_transaction_runner.rb b/lib/mongo/session/with_transaction_runner.rb index 0beed74724..b7c627c4bf 100644 --- a/lib/mongo/session/with_transaction_runner.rb +++ b/lib/mongo/session/with_transaction_runner.rb @@ -15,6 +15,28 @@ class WithTransactionRunner BACKOFF_MAX = 0.5 private_constant :BACKOFF_INITIAL, :BACKOFF_MAX + # Runs one transaction attempt: pre-backoff, start, callback, commit. + def run_attempt(&block) + pre_retry_backoff if @transaction_attempt.positive? + @session.start_transaction(@options) + @transaction_in_progress = true + @transaction_attempt += 1 + result = execute_callback(&block) + @transaction_in_progress = false + commit(result) + end + + # Outer retry loop. Returns the callback's return value on success. + # The ensure fires only on raise/break — NOT on throw :retry (which is + # caught by catch(:retry) within this method). + def run(&block) + loop do + catch(:retry) { return run_attempt(&block) } + end + ensure + abort_if_in_progress + end + def initialize(session, options) @session = session @options = options @@ -32,8 +54,6 @@ def initialize(session, options) private - # -- Deadline helpers -------------------------------------------------- - def deadline_expired? @deadline.zero? ? false : Utils.monotonic_time >= @deadline end @@ -42,8 +62,6 @@ def backoff_would_exceed_deadline?(secs) @deadline.zero? ? false : Utils.monotonic_time + secs >= @deadline end - # -- Backoff ----------------------------------------------------------- - def backoff_seconds_for_retry exponential = BACKOFF_INITIAL * (1.5**(@transaction_attempt - 1)) Random.rand * [ exponential, BACKOFF_MAX ].min @@ -52,19 +70,14 @@ def backoff_seconds_for_retry # -- Timeout error ----------------------------------------------------- # Raises TimeoutError (CSOT) or re-raises last_error (non-CSOT). - # @csot mirrors the original `if @with_transaction_timeout_ms` check: - # in Ruby, 0 is truthy, so timeout_ms: 0 (infinite CSOT) sets @csot = true - # just as `if 0` evaluates to true — the semantics are identical. + # Note: `0` is truthy in Ruby so timeout_ms:0 (infinite CSOT) → @csot = true. def make_timeout_error_from(last_error, message) raise Mongo::Error::TimeoutError, "#{message}: #{last_error}" if @csot raise last_error end - # -- Overload tracking ------------------------------------------------- - # Updates @overload_encountered and @overload_error_count. - # Does not raise or return a meaningful value. def track_overload(err) if err.label?('SystemOverloadedError') @overload_encountered = true @@ -123,8 +136,7 @@ def execute_callback # -- Pre-retry backoff ------------------------------------------------- - # Sleeps before the next transaction attempt, checking the deadline first. - # Two branches: overload path (adaptive) and normal path (exponential). + # Sleeps before the next attempt; overload path uses adaptive delay, normal path uses exponential. def pre_retry_backoff if @overload_encountered delay = @session.client.retry_policy.backoff_delay(@overload_error_count) @@ -238,7 +250,7 @@ def handle_commit_error(err) # Error::AuthError < RuntimeError (not < Mongo::Error), so it requires # its own rescue clause. def commit_with_escalation(result) - commit_options = build_commit_options + commit_options = @options ? { write_concern: @options[:write_concern] } : {} loop do @session.commit_transaction(commit_options) @transaction_in_progress = false @@ -261,10 +273,6 @@ def commit(result) commit_with_escalation(result) end - def build_commit_options - @options ? { write_concern: @options[:write_concern] } : {} - end - # Escalates write concern to w: :majority for commit retries. # Note: wtimeout is NOT added here — commit_transaction handles that # internally when it detects a retry context. From 33a978a1d5d706e15b2b3da3ce2d7cfefdd5a72f Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 17:15:45 +0100 Subject: [PATCH 15/18] Wire with_transaction to WithTransactionRunner --- lib/mongo/session.rb | 177 +----------------- .../session/with_transaction_timeout_spec.rb | 2 +- 2 files changed, 5 insertions(+), 174 deletions(-) diff --git a/lib/mongo/session.rb b/lib/mongo/session.rb index 55e2c3815b..6c0ed57b23 100644 --- a/lib/mongo/session.rb +++ b/lib/mongo/session.rb @@ -458,178 +458,8 @@ def with_transaction(options = nil) @inside_with_transaction = true @with_transaction_timeout_ms = options&.dig(:timeout_ms) || @options[:default_timeout_ms] || @client.timeout_ms @with_transaction_deadline = calculate_with_transaction_deadline(options) - deadline = if @with_transaction_deadline - # CSOT enabled, so we have a customer defined deadline. - @with_transaction_deadline - else - # CSOT not enabled, so we use the default deadline, 120 seconds. - Utils.monotonic_time + 120 - end - transaction_in_progress = false - transaction_attempt = 0 - last_error = nil - overload_error_count = 0 - overload_encountered = false - - loop do - if transaction_attempt > 0 - if overload_encountered - delay = @client.retry_policy.backoff_delay(overload_error_count) - if backoff_would_exceed_deadline?(deadline, delay) - make_timeout_error_from(last_error, 'CSOT timeout expired waiting to retry withTransaction') - end - unless @client.retry_policy.should_retry_overload?(overload_error_count, delay) - raise(last_error) - end - sleep(delay) - else - backoff = backoff_seconds_for_retry(transaction_attempt) - if backoff_would_exceed_deadline?(deadline, backoff) - make_timeout_error_from(last_error, 'CSOT timeout expired waiting to retry withTransaction') - end - sleep(backoff) - end - end - - commit_options = {} - if options - commit_options[:write_concern] = options[:write_concern] - end - start_transaction(options) - transaction_in_progress = true - transaction_attempt += 1 - - begin - rv = yield self - rescue Exception => e - if within_states?(STARTING_TRANSACTION_STATE, TRANSACTION_IN_PROGRESS_STATE) - log_warn("Aborting transaction due to #{e.class}: #{e}") - # CSOT: if the deadline is already expired, clear it so that - # abort_transaction uses a fresh timeout (not the expired deadline). - # If the deadline is not yet expired, keep it so abort uses remaining time. - @with_transaction_deadline = nil if @with_transaction_deadline && deadline_expired?(deadline) - abort_transaction - transaction_in_progress = false - end - - if deadline_expired?(deadline) - transaction_in_progress = false - make_timeout_error_from(e, 'CSOT timeout expired during withTransaction callback') - end - - if e.is_a?(Mongo::Error) && e.label?('TransientTransactionError') - last_error = e - if e.label?('SystemOverloadedError') - overload_encountered = true - overload_error_count += 1 - elsif overload_encountered - overload_error_count += 1 - @client.retry_policy.record_non_overload_retry_failure - end - next - end - - raise - else - if within_states?(TRANSACTION_ABORTED_STATE, NO_TRANSACTION_STATE, TRANSACTION_COMMITTED_STATE) - transaction_in_progress = false - return rv - end - - # CSOT: if the timeout has expired before we can commit, abort the - # transaction instead and raise a client-side timeout error. - if @with_transaction_deadline && deadline_expired?(deadline) - transaction_in_progress = false - @with_transaction_deadline = nil - abort_transaction - raise Mongo::Error::TimeoutError, 'CSOT timeout expired before transaction could be committed' - end - - begin - commit_transaction(commit_options) - transaction_in_progress = false - return rv - rescue Mongo::Error => e - if e.label?('UnknownTransactionCommitResult') - if deadline_expired?(deadline) || - e.is_a?(Error::OperationFailure::Family) && e.max_time_ms_expired? - then - transaction_in_progress = false - if @with_transaction_timeout_ms && deadline_expired?(deadline) - make_timeout_error_from(e, 'CSOT timeout expired during withTransaction commit') - else - raise - end - end - - if e.label?('SystemOverloadedError') - overload_encountered = true - overload_error_count += 1 - elsif overload_encountered - overload_error_count += 1 - @client.retry_policy.record_non_overload_retry_failure - end - - if overload_encountered - delay = @client.retry_policy.backoff_delay(overload_error_count) - if backoff_would_exceed_deadline?(deadline, delay) - transaction_in_progress = false - make_timeout_error_from(e, 'CSOT timeout expired during withTransaction commit') - end - unless @client.retry_policy.should_retry_overload?(overload_error_count, delay) - transaction_in_progress = false - raise - end - sleep(delay) - end - - wc_options = case v = commit_options[:write_concern] - when WriteConcern::Base - v.options - when nil - {} - else - v - end - commit_options[:write_concern] = wc_options.merge(w: :majority) - retry - elsif e.label?('TransientTransactionError') - if Utils.monotonic_time >= deadline - transaction_in_progress = false - make_timeout_error_from(e, 'CSOT timeout expired during withTransaction commit') - end - last_error = e - if e.label?('SystemOverloadedError') - overload_encountered = true - overload_error_count += 1 - elsif overload_encountered - overload_error_count += 1 - @client.retry_policy.record_non_overload_retry_failure - end - @state = NO_TRANSACTION_STATE - next - else - transaction_in_progress = false - raise - end - rescue Error::AuthError - transaction_in_progress = false - raise - end - end - end - - # No official return value, but return true so that in interactive - # use the method hints that it succeeded. - true + WithTransactionRunner.new(self, options).run { yield self } ensure - if transaction_in_progress - log_warn('with_transaction callback broke out of with_transaction loop, aborting transaction') - begin - abort_transaction - rescue Error::OperationFailure::Family, Error::InvalidTransactionOperation - end - end @with_transaction_deadline = nil @with_transaction_timeout_ms = nil @inside_with_transaction = false @@ -1318,19 +1148,20 @@ def clear_with_transaction_deadline! @with_transaction_deadline = nil end - protected - # Readable by WithTransactionRunner to detect CSOT mode. + # @api private attr_reader :with_transaction_timeout_ms # Resets transaction state to NO_TRANSACTION_STATE without calling # abort_transaction. Used by WithTransactionRunner after a # TransientTransactionError during commit — the server has already # rolled back, so no server-side cleanup is needed. + # @api private def reset_transaction_state! @state = NO_TRANSACTION_STATE end + # @api private def within_states?(*states) states.include?(@state) end diff --git a/spec/mongo/session/with_transaction_timeout_spec.rb b/spec/mongo/session/with_transaction_timeout_spec.rb index f32e01e3bb..0cd5124baf 100644 --- a/spec/mongo/session/with_transaction_timeout_spec.rb +++ b/spec/mongo/session/with_transaction_timeout_spec.rb @@ -43,7 +43,7 @@ allow(session).to receive(:commit_transaction) do session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_COMMITTED_STATE) end - allow(session).to receive(:sleep) + allow_any_instance_of(Mongo::Session::WithTransactionRunner).to receive(:sleep) end # Stubs Mongo::Utils.monotonic_time: first `initial_calls` invocations From 81fac05d7ec515668ceebf493793553bbd63305b Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 17:16:18 +0100 Subject: [PATCH 16/18] Remove moved private methods and constants from Session --- lib/mongo/session.rb | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/lib/mongo/session.rb b/lib/mongo/session.rb index 6c0ed57b23..ff2bae7100 100644 --- a/lib/mongo/session.rb +++ b/lib/mongo/session.rb @@ -1269,40 +1269,5 @@ def calculate_with_transaction_deadline(opts) end end - def deadline_expired?(deadline) - if deadline.zero? - false - else - Utils.monotonic_time >= deadline - end - end - - # Exponential backoff settings for with_transaction retries. - BACKOFF_INITIAL = 0.005 - BACKOFF_MAX = 0.5 - private_constant :BACKOFF_INITIAL, :BACKOFF_MAX - - def backoff_seconds_for_retry(transaction_attempt) - exponential = BACKOFF_INITIAL * (1.5 ** (transaction_attempt - 1)) - Random.rand * [exponential, BACKOFF_MAX].min - end - - def backoff_would_exceed_deadline?(deadline, backoff_seconds) - return false if deadline.zero? - - Utils.monotonic_time + backoff_seconds >= deadline - end - - # Implements makeTimeoutError(lastError) from the transactions-convenient-api spec. - # In CSOT mode raises TimeoutError with last_error's message included as a substring. - # In non-CSOT mode re-raises last_error directly. - def make_timeout_error_from(last_error, timeout_message) - if @with_transaction_timeout_ms - raise Mongo::Error::TimeoutError, "#{timeout_message}: #{last_error}" - else - raise last_error - end - end - end end From 0042e2922e9a397f2a5886a3fb317afb3ffcddcc Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 17:19:45 +0100 Subject: [PATCH 17/18] Fix rubocop offenses in spec: use Kernel.sleep stub, compact test --- .../session/with_transaction_timeout_spec.rb | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/spec/mongo/session/with_transaction_timeout_spec.rb b/spec/mongo/session/with_transaction_timeout_spec.rb index 0cd5124baf..0e77b9fd44 100644 --- a/spec/mongo/session/with_transaction_timeout_spec.rb +++ b/spec/mongo/session/with_transaction_timeout_spec.rb @@ -43,7 +43,7 @@ allow(session).to receive(:commit_transaction) do session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_COMMITTED_STATE) end - allow_any_instance_of(Mongo::Session::WithTransactionRunner).to receive(:sleep) + allow(Kernel).to receive(:sleep) end # Stubs Mongo::Utils.monotonic_time: first `initial_calls` invocations @@ -234,19 +234,14 @@ def make_commit_overload_error it 'retries and succeeds (does not raise TimeoutError)' do allow(Mongo::Utils).to receive(:monotonic_time).and_return(0.0) - - call_count = 0 + calls = 0 allow(session).to receive(:commit_transaction) do - call_count += 1 - raise commit_error if call_count == 1 + calls += 1 + raise commit_error if calls == 1 session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_COMMITTED_STATE) end - - result = session.with_transaction(timeout_ms: 0) do - session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE) - success_return_value - end + result = session.with_transaction(timeout_ms: 0) { success_return_value } expect(result).to eq(success_return_value) end end From 45237b8010c9eb35bb5dcdf3f2480930b64f71ae Mon Sep 17 00:00:00 2001 From: Dmitry Rybakov Date: Fri, 27 Mar 2026 20:25:34 +0100 Subject: [PATCH 18/18] Fix specs --- spec/mongo/session/with_transaction_overload_spec.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spec/mongo/session/with_transaction_overload_spec.rb b/spec/mongo/session/with_transaction_overload_spec.rb index 3b1019db08..847ddc4bb5 100644 --- a/spec/mongo/session/with_transaction_overload_spec.rb +++ b/spec/mongo/session/with_transaction_overload_spec.rb @@ -75,13 +75,13 @@ def make_commit_transient_overload_error allow(retry_policy).to receive(:record_non_overload_retry_failure).and_call_original - allow(session).to receive(:sleep) + allow(Kernel).to receive(:sleep) end context 'when callback raises TransientTransactionError with SystemOverloadedError' do it 'uses the new overload backoff' do call_count = 0 - expect(session).to receive(:sleep).with(0.1).once + expect(Kernel).to receive(:sleep).with(0.1).once session.with_transaction do call_count += 1 @@ -93,8 +93,8 @@ def make_commit_transient_overload_error context 'when callback raises TransientTransactionError without SystemOverloadedError' do it 'uses the existing backoff' do call_count = 0 - expect(session).to receive(:sleep).once - expect(session).not_to receive(:sleep).with(0.1) + expect(Kernel).to receive(:sleep).once + expect(Kernel).not_to receive(:sleep).with(0.1) session.with_transaction do call_count += 1 @@ -150,7 +150,7 @@ def make_commit_transient_overload_error end it 'applies overload backoff during commit retry' do - expect(session).to receive(:sleep).once + expect(Kernel).to receive(:sleep).once session.with_transaction do session.instance_variable_set(:@state, Mongo::Session::TRANSACTION_IN_PROGRESS_STATE)