From e7f73dd4f9038b1e7eac1a2d09d718ae3a201598 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 1 Apr 2026 00:01:47 +0000 Subject: [PATCH 1/4] feat(core): retry all provider errors and surface JSON-RPC error details Previously, the retry loop only retried timeout-like errors; other provider failures (e.g. transient API errors, JSON-RPC rejections from ACP SDK) failed immediately with no retry. Now all provider errors are retried up to maxRetries (default 2). Also fixes error serialization: the ACP SDK rejects promises with plain JSON-RPC error objects ({code, message}) instead of Error instances. buildErrorResult used String(error) on these, producing the unhelpful "[object Object]" message. The new extractErrorMessage() helper reads the .message and .code properties from plain objects. Closes #886 Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/evaluation/orchestrator.ts | 46 ++++++++++------- .../core/test/evaluation/orchestrator.test.ts | 49 ++++++++++++++++++- 2 files changed, 76 insertions(+), 19 deletions(-) diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index 521c5659..7a5f80e5 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -1610,7 +1610,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise; + const parts: string[] = []; + if (typeof obj.message === 'string') { + parts.push(obj.message); + } + if (typeof obj.code === 'number') { + parts.push(`(code ${obj.code})`); + } + if (parts.length > 0) { + return parts.join(' '); + } + } + return String(error); } + function mapChildResults( children?: readonly ChildEvaluatorResult[], ): readonly EvaluatorResult[] | undefined { diff --git a/packages/core/test/evaluation/orchestrator.test.ts b/packages/core/test/evaluation/orchestrator.test.ts index 9031cbdc..06073b14 100644 --- a/packages/core/test/evaluation/orchestrator.test.ts +++ b/packages/core/test/evaluation/orchestrator.test.ts @@ -223,7 +223,7 @@ describe('runTestCase', () => { expect(provider.callIndex).toBe(1); }); - it('retries timeout errors up to maxRetries', async () => { + it('retries provider errors up to maxRetries', async () => { const provider = new SequenceProvider('mock', { errors: [new Error('Request timeout')], responses: [ @@ -244,6 +244,27 @@ describe('runTestCase', () => { expect(result.score).toBeGreaterThan(0); }); + it('retries non-timeout provider errors up to maxRetries', async () => { + const provider = new SequenceProvider('mock', { + errors: [new Error('Provider failure')], + responses: [ + { + output: [{ role: 'assistant', content: 'Add structured logging.' }], + }, + ], + }); + + const result = await runEvalCase({ + evalCase: baseTestCase, + provider, + target: baseTarget, + evaluators: evaluatorRegistry, + maxRetries: 1, + }); + + expect(result.score).toBeGreaterThan(0); + }); + it('returns error result on unrecoverable failure', async () => { const provider = new SequenceProvider('mock', { errors: [new Error('Provider failure')], @@ -266,6 +287,32 @@ describe('runTestCase', () => { expect(result.executionError?.message).toContain('Provider failure'); }); + it('surfaces JSON-RPC error objects with readable messages', async () => { + // Simulates @agentclientprotocol/sdk rejecting with a plain JSON-RPC error object + const jsonRpcError = { code: -32600, message: 'Invalid request' }; + const provider: Provider = { + id: 'mock:jsonrpc', + kind: 'mock' as const, + targetName: 'mock', + async invoke(): Promise { + throw jsonRpcError; + }, + }; + + const result = await runEvalCase({ + evalCase: baseTestCase, + provider, + target: baseTarget, + evaluators: evaluatorRegistry, + }); + + expect(result.score).toBe(0); + expect(result.executionStatus).toBe('execution_error'); + expect(result.error).toContain('Invalid request'); + expect(result.error).toContain('code -32600'); + expect(result.error).not.toContain('[object Object]'); + }); + it('surfaces provider raw.error as evaluation error', async () => { const provider = new SequenceProvider('mock', { responses: [ From f3f6f96fe53c0ff2c20b800f3bb541b227987bd3 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 1 Apr 2026 00:14:17 +0000 Subject: [PATCH 2/4] style: fix extra blank line lint error Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/evaluation/orchestrator.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index 7a5f80e5..7b55f6a9 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -2740,7 +2740,6 @@ function extractErrorMessage(error: unknown): string { return String(error); } - function mapChildResults( children?: readonly ChildEvaluatorResult[], ): readonly EvaluatorResult[] | undefined { From 9342f61cd9be4bc3e12d2b7319891454cc568cf4 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 1 Apr 2026 00:27:45 +0000 Subject: [PATCH 3/4] feat(core): add exponential backoff between provider retries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 2^attempt * 1000ms delay (1s, 2s, 4s, …) capped at 30s between retry attempts, matching promptfoo's approach. Prevents hammering rate-limited APIs with immediate retries. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/evaluation/orchestrator.ts | 11 +++++++++ .../core/test/evaluation/orchestrator.test.ts | 24 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index 7b55f6a9..f5ae9a2f 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -1611,6 +1611,8 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + function mapChildResults( children?: readonly ChildEvaluatorResult[], ): readonly EvaluatorResult[] | undefined { diff --git a/packages/core/test/evaluation/orchestrator.test.ts b/packages/core/test/evaluation/orchestrator.test.ts index 06073b14..4249b9fe 100644 --- a/packages/core/test/evaluation/orchestrator.test.ts +++ b/packages/core/test/evaluation/orchestrator.test.ts @@ -265,6 +265,30 @@ describe('runTestCase', () => { expect(result.score).toBeGreaterThan(0); }); + it('applies exponential backoff between retries', async () => { + const provider = new SequenceProvider('mock', { + errors: [new Error('Transient failure')], + responses: [ + { + output: [{ role: 'assistant', content: 'Add structured logging.' }], + }, + ], + }); + + const startMs = Date.now(); + await runEvalCase({ + evalCase: baseTestCase, + provider, + target: baseTarget, + evaluators: evaluatorRegistry, + maxRetries: 1, + }); + const elapsedMs = Date.now() - startMs; + + // First retry has 2^0 * 1000 = 1000ms backoff + expect(elapsedMs).toBeGreaterThanOrEqual(900); + }); + it('returns error result on unrecoverable failure', async () => { const provider = new SequenceProvider('mock', { errors: [new Error('Provider failure')], From 6f5892c9a14b5a65258ec73ad350f021ea88dbfc Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 1 Apr 2026 00:36:36 +0000 Subject: [PATCH 4/4] fix(core): respect abort signal during retry backoff + harden error extraction - sleep() now accepts an optional AbortSignal so cancellation isn't blocked during backoff delays - extractErrorMessage() falls back to JSON.stringify for plain objects that lack a string message property, preventing [object Object] Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/evaluation/orchestrator.ts | 27 ++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index f5ae9a2f..11a36ab3 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -1612,7 +1612,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise; const parts: string[] = []; - if (typeof obj.message === 'string') { + if (typeof obj.message === 'string' && obj.message) { parts.push(obj.message); } if (typeof obj.code === 'number') { @@ -2738,6 +2738,12 @@ function extractErrorMessage(error: unknown): string { if (parts.length > 0) { return parts.join(' '); } + // Fallback: serialize the object so we never return "[object Object]" + try { + return JSON.stringify(error); + } catch { + // circular reference or other serialization failure + } } return String(error); } @@ -2747,8 +2753,19 @@ function retryBackoffMs(attempt: number): number { return Math.min(2 ** attempt * 1000, 30_000); } -function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); +function sleep(ms: number, signal?: AbortSignal): Promise { + if (signal?.aborted) return Promise.resolve(); + return new Promise((resolve) => { + const timer = setTimeout(resolve, ms); + signal?.addEventListener( + 'abort', + () => { + clearTimeout(timer); + resolve(); + }, + { once: true }, + ); + }); } function mapChildResults(