@@ -6103,6 +6103,8 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
61036103 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
61046104 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
61056105 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6106+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6107+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
61066108 ]
61076109
61086110 result = _evals_common ._execute_evaluation (
@@ -6111,18 +6113,13 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
61116113 metrics = [metric ],
61126114 )
61136115
6114- assert mock_private_evaluate_instances .call_count == 3
6115- assert mock_sleep .call_count == 2
6116+ assert mock_private_evaluate_instances .call_count == 5
6117+ assert mock_sleep .call_count == 4
61166118 assert len (result .summary_metrics ) == 1
61176119 summary_metric = result .summary_metrics [0 ]
61186120 assert summary_metric .metric_name == "summarization_quality"
61196121 assert summary_metric .mean_score is None
61206122 assert summary_metric .num_cases_error == 1
6121- assert (
6122- "Judge model resource exhausted after 3 retries"
6123- ) in result .eval_case_results [0 ].response_candidate_results [0 ].metric_results [
6124- "summarization_quality"
6125- ].error_message
61266123
61276124
61286125class TestEvaluationDataset :
@@ -6643,3 +6640,182 @@ def test_create_evaluation_set_with_agent_data(
66436640 candidate_response = candidate_responses [0 ]
66446641 assert candidate_response ["candidate" ] == "test-candidate"
66456642 assert candidate_response ["agent_data" ] == agent_data
6643+
6644+
6645+ class TestRateLimiter :
6646+ """Tests for the RateLimiter class in _evals_utils."""
6647+
6648+ def test_rate_limiter_init (self ):
6649+ """Tests that RateLimiter initializes correctly."""
6650+ limiter = _evals_utils .RateLimiter (rate = 10.0 )
6651+ assert limiter .seconds_per_event == pytest .approx (0.1 )
6652+
6653+ def test_rate_limiter_invalid_rate (self ):
6654+ """Tests that RateLimiter raises ValueError for non-positive rate."""
6655+ with pytest .raises (ValueError , match = "Rate must be a positive number" ):
6656+ _evals_utils .RateLimiter (rate = 0 )
6657+ with pytest .raises (ValueError , match = "Rate must be a positive number" ):
6658+ _evals_utils .RateLimiter (rate = - 1 )
6659+
6660+ @mock .patch ("time.sleep" , return_value = None )
6661+ @mock .patch ("time.monotonic" )
6662+ def test_rate_limiter_sleep_and_advance (self , mock_monotonic , mock_sleep ):
6663+ """Tests that sleep_and_advance properly throttles calls."""
6664+ # With rate=10 (0.1s interval):
6665+ # - __init__ at t=0: _next_allowed = 0.0
6666+ # - first call at t=0: no delay, _next_allowed = 0.1
6667+ # - second call at t=0.01: delay = 0.1 - 0.01 = 0.09
6668+ mock_monotonic .side_effect = [
6669+ 0.0 , # __init__: time.monotonic()
6670+ 0.0 , # first sleep_and_advance: now
6671+ 0.01 , # second sleep_and_advance: now
6672+ ]
6673+ limiter = _evals_utils .RateLimiter (rate = 10.0 )
6674+ limiter .sleep_and_advance () # First call - should not sleep
6675+ limiter .sleep_and_advance () # Second call - should sleep
6676+ assert mock_sleep .call_count == 1
6677+ # Verify sleep was called with approximately the right delay
6678+ sleep_delay = mock_sleep .call_args [0 ][0 ]
6679+ assert 0.08 < sleep_delay <= 0.1
6680+
6681+ def test_rate_limiter_no_sleep_when_enough_time_passed (self ):
6682+ """Tests that no sleep occurs when enough time has passed."""
6683+ import time as real_time
6684+
6685+ limiter = _evals_utils .RateLimiter (rate = 1000.0 ) # Very high rate
6686+ # With rate=1000, interval is 0.001s - should not sleep
6687+ start = real_time .time ()
6688+ for _ in range (5 ):
6689+ limiter .sleep_and_advance ()
6690+ elapsed = real_time .time () - start
6691+ # 5 calls at 1000 QPS should take ~0.005s, certainly under 1s
6692+ assert elapsed < 1.0
6693+
6694+
6695+ class TestCallWithRetry :
6696+ """Tests for the shared _call_with_retry helper."""
6697+
6698+ @mock .patch ("time.sleep" , return_value = None )
6699+ def test_call_with_retry_success_on_first_try (self , mock_sleep ):
6700+ """Tests that _call_with_retry returns immediately on success."""
6701+ fn = mock .Mock (return_value = "success" )
6702+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6703+ assert result == "success"
6704+ assert fn .call_count == 1
6705+ assert mock_sleep .call_count == 0
6706+
6707+ @mock .patch ("time.sleep" , return_value = None )
6708+ def test_call_with_retry_success_after_retries (self , mock_sleep ):
6709+ """Tests that _call_with_retry succeeds after transient failures."""
6710+ error_json = {"error" : {"code" : 429 , "message" : "exhausted" }}
6711+ fn = mock .Mock (
6712+ side_effect = [
6713+ genai_errors .ClientError (code = 429 , response_json = error_json ),
6714+ genai_errors .ClientError (code = 429 , response_json = error_json ),
6715+ "success" ,
6716+ ]
6717+ )
6718+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6719+ assert result == "success"
6720+ assert fn .call_count == 3
6721+ assert mock_sleep .call_count == 2
6722+
6723+ @mock .patch ("time.sleep" , return_value = None )
6724+ def test_call_with_retry_raises_after_max_retries (self , mock_sleep ):
6725+ """Tests that _call_with_retry raises after exhausting retries."""
6726+ error_json = {"error" : {"code" : 429 , "message" : "exhausted" }}
6727+ fn = mock .Mock (
6728+ side_effect = genai_errors .ClientError (code = 429 , response_json = error_json )
6729+ )
6730+ with pytest .raises (genai_errors .ClientError ):
6731+ _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6732+ assert fn .call_count == 5 # _MAX_RETRIES
6733+ assert mock_sleep .call_count == 4
6734+
6735+ @mock .patch ("time.sleep" , return_value = None )
6736+ def test_call_with_retry_retries_on_server_error (self , mock_sleep ):
6737+ """Tests retry on 503 ServiceUnavailable (ServerError)."""
6738+ error_json = {"error" : {"code" : 503 , "message" : "unavailable" }}
6739+ fn = mock .Mock (
6740+ side_effect = [
6741+ genai_errors .ServerError (code = 503 , response_json = error_json ),
6742+ "success" ,
6743+ ]
6744+ )
6745+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6746+ assert result == "success"
6747+ assert fn .call_count == 2
6748+
6749+ @mock .patch ("time.sleep" , return_value = None )
6750+ def test_call_with_retry_no_retry_on_non_retryable (self , mock_sleep ):
6751+ """Tests that non-retryable errors are raised immediately."""
6752+ error_json = {"error" : {"code" : 400 , "message" : "bad request" }}
6753+ fn = mock .Mock (
6754+ side_effect = genai_errors .ClientError (code = 400 , response_json = error_json )
6755+ )
6756+ with pytest .raises (genai_errors .ClientError ):
6757+ _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
6758+ assert fn .call_count == 1
6759+ assert mock_sleep .call_count == 0
6760+
6761+
6762+ class TestComputationMetricRetry :
6763+ """Tests for retry behavior in ComputationMetricHandler."""
6764+
6765+ @mock .patch .object (
6766+ _evals_metric_handlers .ComputationMetricHandler ,
6767+ "SUPPORTED_COMPUTATION_METRICS" ,
6768+ frozenset (["bleu" ]),
6769+ )
6770+ @mock .patch ("time.sleep" , return_value = None )
6771+ @mock .patch (
6772+ "vertexai._genai.evals.Evals.evaluate_instances"
6773+ )
6774+ def test_computation_metric_retry_on_resource_exhausted (
6775+ self ,
6776+ mock_evaluate_instances ,
6777+ mock_sleep ,
6778+ mock_api_client_fixture ,
6779+ ):
6780+ """Tests that ComputationMetricHandler retries on 429."""
6781+ dataset_df = pd .DataFrame (
6782+ [
6783+ {
6784+ "prompt" : "Test prompt" ,
6785+ "response" : "Test response" ,
6786+ "reference" : "Test reference" ,
6787+ }
6788+ ]
6789+ )
6790+ input_dataset = vertexai_genai_types .EvaluationDataset (
6791+ eval_dataset_df = dataset_df
6792+ )
6793+ metric = vertexai_genai_types .Metric (name = "bleu" )
6794+ error_response_json = {
6795+ "error" : {
6796+ "code" : 429 ,
6797+ "message" : "Resource exhausted." ,
6798+ "status" : "RESOURCE_EXHAUSTED" ,
6799+ }
6800+ }
6801+ mock_bleu_result = mock .MagicMock ()
6802+ mock_bleu_result .model_dump .return_value = {
6803+ "bleu_results" : {"bleu_metric_values" : [{"score" : 0.85 }]}
6804+ }
6805+ mock_evaluate_instances .side_effect = [
6806+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6807+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6808+ mock_bleu_result ,
6809+ ]
6810+
6811+ result = _evals_common ._execute_evaluation (
6812+ api_client = mock_api_client_fixture ,
6813+ dataset = input_dataset ,
6814+ metrics = [metric ],
6815+ )
6816+
6817+ assert mock_evaluate_instances .call_count == 3
6818+ assert mock_sleep .call_count == 2
6819+ summary_metric = result .summary_metrics [0 ]
6820+ assert summary_metric .metric_name == "bleu"
6821+ assert summary_metric .mean_score == 0.85
0 commit comments