Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 54 additions & 24 deletions packages/core/src/evaluation/loaders/evaluator-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ import { resolveFileReference } from './file-resolver.js';
const ANSI_YELLOW = '\u001b[33m';
const ANSI_RESET = '\u001b[0m';

/**
* Prefix for explicit file references in prompt strings.
* Consistent with case-file-loader.ts which uses "file://" for test-case file references.
*
* Usage:
* prompt: "file://prompts/grader.md" → explicit file, error if not found
* prompt: "grader.md" → inline text (never resolved as file)
* prompt: "Evaluate the response" → inline text
*/
const PROMPT_FILE_PREFIX = 'file://';

/**
* Normalize evaluator type names from legacy snake_case to internal kebab-case.
* Accepts both forms for backward compatibility:
Expand Down Expand Up @@ -428,14 +439,27 @@ async function parseEvaluatorList(
threshold: thresholdValue,
};
} else {
// llm-grader aggregator
const aggregatorPrompt = asString(rawAggregator.prompt);
// llm-grader aggregator — same file:// prefix logic as evaluator prompts
const rawAggPrompt = asString(rawAggregator.prompt);
let aggregatorPrompt: string | undefined;
let promptPath: string | undefined;

if (aggregatorPrompt) {
const resolved = await resolveFileReference(aggregatorPrompt, searchRoots);
if (resolved.resolvedPath) {
promptPath = path.resolve(resolved.resolvedPath);
if (rawAggPrompt) {
if (rawAggPrompt.startsWith(PROMPT_FILE_PREFIX)) {
// Explicit file reference — error if not found
const fileRef = rawAggPrompt.slice(PROMPT_FILE_PREFIX.length);
aggregatorPrompt = fileRef;
const resolved = await resolveFileReference(fileRef, searchRoots);
if (resolved.resolvedPath) {
promptPath = path.resolve(resolved.resolvedPath);
} else {
throw new Error(
`Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`,
);
}
} else {
// Bare string — always treat as inline text, no file resolution
aggregatorPrompt = rawAggPrompt;
}
}

Expand Down Expand Up @@ -1144,26 +1168,32 @@ async function parseEvaluatorList(
promptScriptConfig = rawPrompt.config as Record<string, unknown>;
}
} else if (typeof rawPrompt === 'string') {
// Text template prompt (existing behavior)
prompt = rawPrompt;
const resolved = await resolveFileReference(prompt, searchRoots);
if (resolved.resolvedPath) {
promptPath = path.resolve(resolved.resolvedPath);
// Validate custom prompt content upfront - throws error if validation fails
try {
await validateCustomPromptContent(promptPath);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
// Add context and re-throw for the caller to handle
throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
// Text template prompt — supports explicit file:// prefix for file references.
// "file://prompts/grader.md" → explicit file reference, error if not found
// "grader.md" → inline text (no file resolution)
// "Evaluate the response" → inline text

if (rawPrompt.startsWith(PROMPT_FILE_PREFIX)) {
// Explicit file reference — strip prefix and resolve. Error if not found.
const fileRef = rawPrompt.slice(PROMPT_FILE_PREFIX.length);
prompt = fileRef;
const resolved = await resolveFileReference(fileRef, searchRoots);
if (resolved.resolvedPath) {
promptPath = path.resolve(resolved.resolvedPath);
try {
await validateCustomPromptContent(promptPath);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
}
} else {
throw new Error(
`Evaluator '${name}' in '${evalId}': prompt file not found: ${resolved.displayPath}`,
);
}
} else {
logWarning(
`Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
resolved.attempted.length > 0
? resolved.attempted.map((attempt) => ` Tried: ${attempt}`)
: undefined,
);
// Bare string — always treat as inline text, no file resolution
prompt = rawPrompt;
}
}

Expand Down
58 changes: 58 additions & 0 deletions packages/core/test/evaluation/loaders/evaluator-parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1780,3 +1780,61 @@ describe('parseEvaluators - string shorthand in assertions', () => {
expect(evaluators).toBeUndefined();
});
});

describe('parseEvaluators - file:// prefix prompt resolution', () => {
let tempDir: string;

beforeAll(async () => {
tempDir = path.join(os.tmpdir(), `agentv-test-file-prefix-${Date.now()}`);
await mkdir(tempDir, { recursive: true });
await writeFile(path.join(tempDir, 'grader.md'), 'Evaluate the quality of {{ output }}');
});

afterAll(async () => {
await rm(tempDir, { recursive: true, force: true });
});

it('file:// prefix resolves existing file', async () => {
const evaluators = await parseEvaluators(
{
assertions: [{ name: 'quality', type: 'llm-grader', prompt: 'file://grader.md' }],
},
undefined,
[tempDir],
'test-1',
);
expect(evaluators).toHaveLength(1);
const config = evaluators?.[0] as LlmGraderEvaluatorConfig;
expect(config.promptPath).toBeTruthy();
expect(config.promptPath).toContain('grader.md');
});

it('file:// prefix throws when file not found', async () => {
await expect(
parseEvaluators(
{
assertions: [{ name: 'missing', type: 'llm-grader', prompt: 'file://nonexistent.md' }],
},
undefined,
[tempDir],
'test-1',
),
).rejects.toThrow(/prompt file not found/);
});

it('bare path is always treated as inline text even if file exists', async () => {
const evaluators = await parseEvaluators(
{
assertions: [{ name: 'quality', type: 'llm-grader', prompt: 'grader.md' }],
},
undefined,
[tempDir],
'test-1',
);
expect(evaluators).toHaveLength(1);
const config = evaluators?.[0] as LlmGraderEvaluatorConfig;
// Bare string is inline text — no file resolution, no promptPath
expect(config.prompt).toBe('grader.md');
expect(config.promptPath).toBeUndefined();
});
});
Loading