@@ -6194,6 +6194,8 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
61946194 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
61956195 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
61966196 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6197+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6198+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
61976199 ]
61986200
61996201 result = _evals_common ._execute_evaluation (
@@ -6202,18 +6204,13 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
62026204 metrics = [metric ],
62036205 )
62046206
6205- assert mock_private_evaluate_instances .call_count == 3
6206- assert mock_sleep .call_count == 2
6207+ assert mock_private_evaluate_instances .call_count == 5
6208+ assert mock_sleep .call_count == 4
62076209 assert len (result .summary_metrics ) == 1
62086210 summary_metric = result .summary_metrics [0 ]
62096211 assert summary_metric .metric_name == "summarization_quality"
62106212 assert summary_metric .mean_score is None
62116213 assert summary_metric .num_cases_error == 1
6212- assert (
6213- "Judge model resource exhausted after 3 retries"
6214- ) in result .eval_case_results [0 ].response_candidate_results [0 ].metric_results [
6215- "summarization_quality"
6216- ].error_message
62176214
62186215
62196216class TestEvaluationDataset :
@@ -6734,3 +6731,182 @@ def test_create_evaluation_set_with_agent_data(
67346731 candidate_response = candidate_responses [0 ]
67356732 assert candidate_response ["candidate" ] == "test-candidate"
67366733 assert candidate_response ["agent_data" ] == agent_data
6734+
6735+
6736+ class TestRateLimiter :
6737+ """Tests for the RateLimiter class in _evals_utils."""
6738+
6739+ def test_rate_limiter_init (self ):
6740+ """Tests that RateLimiter initializes correctly."""
6741+ limiter = _evals_utils .RateLimiter (rate = 10.0 )
6742+ assert limiter .seconds_per_event == pytest .approx (0.1 )
6743+
6744+ def test_rate_limiter_invalid_rate (self ):
6745+ """Tests that RateLimiter raises ValueError for non-positive rate."""
6746+ with pytest .raises (ValueError , match = "Rate must be a positive number" ):
6747+ _evals_utils .RateLimiter (rate = 0 )
6748+ with pytest .raises (ValueError , match = "Rate must be a positive number" ):
6749+ _evals_utils .RateLimiter (rate = - 1 )
6750+
6751+ @mock .patch ("time.sleep" , return_value = None )
6752+ @mock .patch ("time.monotonic" )
6753+ def test_rate_limiter_sleep_and_advance (self , mock_monotonic , mock_sleep ):
6754+ """Tests that sleep_and_advance properly throttles calls."""
6755+ # With rate=10 (0.1s interval):
6756+ # - __init__ at t=0: _next_allowed = 0.0
6757+ # - first call at t=0: no delay, _next_allowed = 0.1
6758+ # - second call at t=0.01: delay = 0.1 - 0.01 = 0.09
6759+ mock_monotonic .side_effect = [
6760+ 0.0 , # __init__: time.monotonic()
6761+ 0.0 , # first sleep_and_advance: now
6762+ 0.01 , # second sleep_and_advance: now
6763+ ]
6764+ limiter = _evals_utils .RateLimiter (rate = 10.0 )
6765+ limiter .sleep_and_advance () # First call - should not sleep
6766+ limiter .sleep_and_advance () # Second call - should sleep
6767+ assert mock_sleep .call_count == 1
6768+ # Verify sleep was called with approximately the right delay
6769+ sleep_delay = mock_sleep .call_args [0 ][0 ]
6770+ assert 0.08 < sleep_delay <= 0.1
6771+
6772+ def test_rate_limiter_no_sleep_when_enough_time_passed (self ):
6773+ """Tests that no sleep occurs when enough time has passed."""
6774+ import time as real_time
6775+
6776+ limiter = _evals_utils .RateLimiter (rate = 1000.0 ) # Very high rate
6777+ # With rate=1000, interval is 0.001s - should not sleep
6778+ start = real_time .time ()
6779+ for _ in range (5 ):
6780+ limiter .sleep_and_advance ()
6781+ elapsed = real_time .time () - start
6782+ # 5 calls at 1000 QPS should take ~0.005s, certainly under 1s
6783+ assert elapsed < 1.0
6784+
6785+
6786+ class TestCallWithRetry :
6787+ """Tests for the shared _call_with_retry helper."""
6788+
6789+ @mock .patch ("time.sleep" , return_value = None )
6790+ def test_call_with_retry_success_on_first_try (self , mock_sleep ):
6791+ """Tests that _call_with_retry returns immediately on success."""
6792+ fn = mock .Mock (return_value = "success" )
6793+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6794+ assert result == "success"
6795+ assert fn .call_count == 1
6796+ assert mock_sleep .call_count == 0
6797+
6798+ @mock .patch ("time.sleep" , return_value = None )
6799+ def test_call_with_retry_success_after_retries (self , mock_sleep ):
6800+ """Tests that _call_with_retry succeeds after transient failures."""
6801+ error_json = {"error" : {"code" : 429 , "message" : "exhausted" }}
6802+ fn = mock .Mock (
6803+ side_effect = [
6804+ genai_errors .ClientError (code = 429 , response_json = error_json ),
6805+ genai_errors .ClientError (code = 429 , response_json = error_json ),
6806+ "success" ,
6807+ ]
6808+ )
6809+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6810+ assert result == "success"
6811+ assert fn .call_count == 3
6812+ assert mock_sleep .call_count == 2
6813+
6814+ @mock .patch ("time.sleep" , return_value = None )
6815+ def test_call_with_retry_raises_after_max_retries (self , mock_sleep ):
6816+ """Tests that _call_with_retry raises after exhausting retries."""
6817+ error_json = {"error" : {"code" : 429 , "message" : "exhausted" }}
6818+ fn = mock .Mock (
6819+ side_effect = genai_errors .ClientError (code = 429 , response_json = error_json )
6820+ )
6821+ with pytest .raises (genai_errors .ClientError ):
6822+ _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6823+ assert fn .call_count == 5 # _MAX_RETRIES
6824+ assert mock_sleep .call_count == 4
6825+
6826+ @mock .patch ("time.sleep" , return_value = None )
6827+ def test_call_with_retry_retries_on_server_error (self , mock_sleep ):
6828+ """Tests retry on 503 ServiceUnavailable (ServerError)."""
6829+ error_json = {"error" : {"code" : 503 , "message" : "unavailable" }}
6830+ fn = mock .Mock (
6831+ side_effect = [
6832+ genai_errors .ServerError (code = 503 , response_json = error_json ),
6833+ "success" ,
6834+ ]
6835+ )
6836+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6837+ assert result == "success"
6838+ assert fn .call_count == 2
6839+
6840+ @mock .patch ("time.sleep" , return_value = None )
6841+ def test_call_with_retry_no_retry_on_non_retryable (self , mock_sleep ):
6842+ """Tests that non-retryable errors are raised immediately."""
6843+ error_json = {"error" : {"code" : 400 , "message" : "bad request" }}
6844+ fn = mock .Mock (
6845+ side_effect = genai_errors .ClientError (code = 400 , response_json = error_json )
6846+ )
6847+ with pytest .raises (genai_errors .ClientError ):
6848+ _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6849+ assert fn .call_count == 1
6850+ assert mock_sleep .call_count == 0
6851+
6852+
6853+ class TestComputationMetricRetry :
6854+ """Tests for retry behavior in ComputationMetricHandler."""
6855+
6856+ @mock .patch .object (
6857+ _evals_metric_handlers .ComputationMetricHandler ,
6858+ "SUPPORTED_COMPUTATION_METRICS" ,
6859+ frozenset (["bleu" ]),
6860+ )
6861+ @mock .patch ("time.sleep" , return_value = None )
6862+ @mock .patch (
6863+ "vertexai._genai.evals.Evals.evaluate_instances"
6864+ )
6865+ def test_computation_metric_retry_on_resource_exhausted (
6866+ self ,
6867+ mock_evaluate_instances ,
6868+ mock_sleep ,
6869+ mock_api_client_fixture ,
6870+ ):
6871+ """Tests that ComputationMetricHandler retries on 429."""
6872+ dataset_df = pd .DataFrame (
6873+ [
6874+ {
6875+ "prompt" : "Test prompt" ,
6876+ "response" : "Test response" ,
6877+ "reference" : "Test reference" ,
6878+ }
6879+ ]
6880+ )
6881+ input_dataset = vertexai_genai_types .EvaluationDataset (
6882+ eval_dataset_df = dataset_df
6883+ )
6884+ metric = vertexai_genai_types .Metric (name = "bleu" )
6885+ error_response_json = {
6886+ "error" : {
6887+ "code" : 429 ,
6888+ "message" : "Resource exhausted." ,
6889+ "status" : "RESOURCE_EXHAUSTED" ,
6890+ }
6891+ }
6892+ mock_bleu_result = mock .MagicMock ()
6893+ mock_bleu_result .model_dump .return_value = {
6894+ "bleu_results" : {"bleu_metric_values" : [{"score" : 0.85 }]}
6895+ }
6896+ mock_evaluate_instances .side_effect = [
6897+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6898+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6899+ mock_bleu_result ,
6900+ ]
6901+
6902+ result = _evals_common ._execute_evaluation (
6903+ api_client = mock_api_client_fixture ,
6904+ dataset = input_dataset ,
6905+ metrics = [metric ],
6906+ )
6907+
6908+ assert mock_evaluate_instances .call_count == 3
6909+ assert mock_sleep .call_count == 2
6910+ summary_metric = result .summary_metrics [0 ]
6911+ assert summary_metric .metric_name == "bleu"
6912+ assert summary_metric .mean_score == 0.85
0 commit comments