diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d934da9..f984803 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -198,21 +198,15 @@ jobs: publish-compat-shim: runs-on: ubuntu-latest - needs: build-and-publish - - steps: + needs: build-and-publish steps: - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Node.js + uses: actions/checkout@v4 - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: '20' registry-url: 'https://registry.npmjs.org' scope: '@predicatesystems' - always-auth: true - - - name: Extract version from tag or input + always-auth: true - name: Extract version from tag or input id: version run: | if [ "${{ github.event_name }}" == "release" ]; then @@ -222,18 +216,13 @@ jobs: VERSION="${{ github.event.inputs.version }}" fi echo "version=$VERSION" >> $GITHUB_OUTPUT - echo "Version: $VERSION" - - - name: Sync shim version and runtime dependency + echo "Version: $VERSION" - name: Sync shim version and runtime dependency run: | VERSION="${{ steps.version.outputs.version }}" npm pkg set version=$VERSION --prefix compat/sdk-shim - npm pkg set dependencies."@predicatesystems/runtime"=$VERSION --prefix compat/sdk-shim - - - name: Publish compatibility shim to npm + npm pkg set dependencies."@predicatesystems/runtime"=$VERSION --prefix compat/sdk-shim - name: Publish compatibility shim to npm run: | cd compat/sdk-shim npm publish --access public env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} \ No newline at end of file diff --git a/src/agents/index.ts b/src/agents/index.ts new file mode 100644 index 0000000..e1fc63c --- /dev/null +++ b/src/agents/index.ts @@ -0,0 +1,34 @@ +/** + * Agents Module + * + * High-level agent implementations for browser automation. + */ + +// Browser Agent (enterprise features) +export { + PredicateBrowserAgent, + type PredicateBrowserAgentConfig, + type PermissionRecoveryConfig, + type VisionFallbackConfig, + type CaptchaConfig, +} from './browser-agent'; + +// Planner-Executor Agent (two-tier LLM architecture) +export { + // Configuration + type SnapshotEscalationConfig, + type RetryConfig, + type StepwisePlanningConfig, + type PlannerExecutorConfig, + ConfigPreset, + getConfigPreset, + mergeConfig, + DEFAULT_CONFIG, + // Factory + type CreateAgentOptions, + type AgentProviders, + detectProvider, + createProvider, + resolveConfig, + createPlannerExecutorAgentProviders, +} from './planner-executor'; diff --git a/src/agents/planner-executor/agent-factory.ts b/src/agents/planner-executor/agent-factory.ts new file mode 100644 index 0000000..00c06eb --- /dev/null +++ b/src/agents/planner-executor/agent-factory.ts @@ -0,0 +1,276 @@ +/** + * Agent Factory for PlannerExecutorAgent + * + * Provides convenient factory functions to create agents with sensible defaults, + * auto-provider detection, and auto-tracer creation. + */ + +import { LLMProvider, OllamaProvider, OpenAIProvider, AnthropicProvider } from '../../llm-provider'; +import { createTracer, createLocalTracer, Tracer } from '../../tracing'; +import { + PlannerExecutorConfig, + ConfigPreset, + getConfigPreset, + mergeConfig, + DEFAULT_CONFIG, + DeepPartial, +} from './config'; + +/** + * Options for creating a PlannerExecutorAgent. + */ +export interface CreateAgentOptions { + /** Model name for planning (e.g., "gpt-4o", "qwen3:8b") */ + plannerModel: string; + + /** Model name for execution (e.g., "gpt-4o-mini", "qwen3:4b") */ + executorModel: string; + + /** Provider for planner ("auto", "ollama", "openai", "anthropic") */ + plannerProvider?: 'auto' | 'ollama' | 'openai' | 'anthropic'; + + /** Provider for executor ("auto", "ollama", "openai", "anthropic") */ + executorProvider?: 'auto' | 'ollama' | 'openai' | 'anthropic'; + + /** Ollama server URL (default: http://localhost:11434) */ + ollamaBaseUrl?: string; + + /** OpenAI API key (defaults to OPENAI_API_KEY env var) */ + openaiApiKey?: string; + + /** Anthropic API key (defaults to ANTHROPIC_API_KEY env var) */ + anthropicApiKey?: string; + + /** Configuration preset or partial config */ + config?: ConfigPreset | string | DeepPartial; + + /** Run ID for tracing (generates UUID if not provided) */ + runId?: string; + + /** Whether to auto-create tracer (default: true) */ + autoTracer?: boolean; +} + +/** + * Auto-detect provider from model name. + */ +export function detectProvider(model: string): 'openai' | 'anthropic' | 'ollama' { + const modelLower = model.toLowerCase(); + + // OpenAI models + if ( + modelLower.startsWith('gpt-') || + modelLower.startsWith('o1-') || + modelLower.startsWith('o3-') || + modelLower.startsWith('o4-') + ) { + return 'openai'; + } + + // Anthropic models + if (modelLower.startsWith('claude-')) { + return 'anthropic'; + } + + // Common Ollama model patterns + const ollamaPatterns = ['qwen', 'llama', 'phi', 'mistral', 'gemma', 'deepseek', 'codellama']; + if (ollamaPatterns.some(p => modelLower.startsWith(p))) { + return 'ollama'; + } + + // Ollama models typically have "model:tag" format + if (model.includes(':')) { + return 'ollama'; + } + + // Default to ollama for unknown models (assume local) + return 'ollama'; +} + +/** + * Create LLM provider based on provider name. + */ +export function createProvider( + model: string, + provider: 'auto' | 'ollama' | 'openai' | 'anthropic', + options: { + ollamaBaseUrl?: string; + openaiApiKey?: string; + anthropicApiKey?: string; + } +): LLMProvider { + const resolvedProvider = provider === 'auto' ? detectProvider(model) : provider; + + switch (resolvedProvider) { + case 'ollama': + return new OllamaProvider({ + model, + baseUrl: options.ollamaBaseUrl ?? 'http://localhost:11434', + }); + + case 'openai': { + const apiKey = options.openaiApiKey ?? process.env.OPENAI_API_KEY; + if (!apiKey) { + throw new Error('OpenAI API key required. Set OPENAI_API_KEY or pass openaiApiKey option.'); + } + return new OpenAIProvider(apiKey, model); + } + + case 'anthropic': { + const apiKey = options.anthropicApiKey ?? process.env.ANTHROPIC_API_KEY; + if (!apiKey) { + throw new Error( + 'Anthropic API key required. Set ANTHROPIC_API_KEY or pass anthropicApiKey option.' + ); + } + return new AnthropicProvider(apiKey, model); + } + + default: + throw new Error( + `Unknown provider: ${provider}. Supported: 'auto', 'ollama', 'openai', 'anthropic'` + ); + } +} + +/** + * Resolve configuration from preset or partial config. + */ +export function resolveConfig( + config?: ConfigPreset | string | DeepPartial +): PlannerExecutorConfig { + if (!config) { + return { ...DEFAULT_CONFIG }; + } + + // String preset name + if (typeof config === 'string') { + return getConfigPreset(config); + } + + // It's a partial config object - merge with defaults + return mergeConfig(config); +} + +/** + * Result from createPlannerExecutorAgentProviders. + * + * Note: The full PlannerExecutorAgent is not yet implemented in TypeScript. + * This function creates the providers and config that will be used when + * the agent is ported. + */ +export interface AgentProviders { + /** Planner LLM provider */ + planner: LLMProvider; + + /** Executor LLM provider */ + executor: LLMProvider; + + /** Resolved configuration */ + config: PlannerExecutorConfig; + + /** Tracer instance (if autoTracer was enabled) */ + tracer?: Tracer; +} + +/** + * Create providers and configuration for PlannerExecutorAgent. + * + * This is a helper that creates the LLM providers with auto-detection + * and resolves configuration from presets. Use this until the full + * PlannerExecutorAgent is ported to TypeScript. + * + * @example Minimal local Ollama setup + * ```typescript + * const { planner, executor, config } = await createPlannerExecutorAgentProviders({ + * plannerModel: 'qwen3:8b', + * executorModel: 'qwen3:4b', + * }); + * ``` + * + * @example With cloud OpenAI + * ```typescript + * const { planner, executor, config } = await createPlannerExecutorAgentProviders({ + * plannerModel: 'gpt-4o', + * executorModel: 'gpt-4o-mini', + * openaiApiKey: 'sk-...', + * }); + * ``` + * + * @example Mixed cloud planner, local executor + * ```typescript + * const { planner, executor, config } = await createPlannerExecutorAgentProviders({ + * plannerModel: 'gpt-4o', + * plannerProvider: 'openai', + * executorModel: 'qwen3:4b', + * executorProvider: 'ollama', + * openaiApiKey: 'sk-...', + * }); + * ``` + * + * @example With config preset + * ```typescript + * import { ConfigPreset } from '@predicatesystems/runtime'; + * + * const { planner, executor, config } = await createPlannerExecutorAgentProviders({ + * plannerModel: 'qwen3:8b', + * executorModel: 'qwen3:4b', + * config: ConfigPreset.LOCAL_SMALL_MODEL, + * }); + * ``` + */ +export async function createPlannerExecutorAgentProviders( + options: CreateAgentOptions +): Promise { + const { + plannerModel, + executorModel, + plannerProvider = 'auto', + executorProvider = 'auto', + ollamaBaseUrl, + openaiApiKey, + anthropicApiKey, + config, + runId, + autoTracer = false, + } = options; + + // Create providers + const planner = createProvider(plannerModel, plannerProvider, { + ollamaBaseUrl, + openaiApiKey, + anthropicApiKey, + }); + + const executor = createProvider(executorModel, executorProvider, { + ollamaBaseUrl, + openaiApiKey, + anthropicApiKey, + }); + + // Resolve configuration + const resolvedConfig = resolveConfig(config); + + // Create tracer if requested + let tracer: Tracer | undefined; + if (autoTracer) { + const apiKey = process.env.PREDICATE_API_KEY; + if (apiKey) { + tracer = await createTracer({ + apiKey, + runId, + llmModel: `${plannerModel}/${executorModel}`, + agentType: 'planner-executor', + }); + } else { + tracer = createLocalTracer(runId); + } + } + + return { + planner, + executor, + config: resolvedConfig, + tracer, + }; +} diff --git a/src/agents/planner-executor/config.ts b/src/agents/planner-executor/config.ts new file mode 100644 index 0000000..2402853 --- /dev/null +++ b/src/agents/planner-executor/config.ts @@ -0,0 +1,260 @@ +/** + * PlannerExecutorAgent Configuration + * + * Configuration interfaces and presets for the planner-executor agent architecture. + */ + +/** + * Snapshot escalation configuration for reliable element capture. + * + * When element selection fails, the agent can retry with increasing element limits. + */ +export interface SnapshotEscalationConfig { + /** Whether escalation is enabled (default: true) */ + enabled: boolean; + /** Starting element limit (default: 50) */ + limitBase: number; + /** Increase per escalation step (default: 25) */ + limitStep: number; + /** Maximum element limit (default: 200) */ + limitMax: number; +} + +/** + * Retry and verification configuration. + */ +export interface RetryConfig { + /** Verification timeout in milliseconds (default: 10000) */ + verifyTimeoutMs: number; + /** Verification poll interval in milliseconds (default: 500) */ + verifyPollMs: number; + /** Maximum verification attempts (default: 4) */ + verifyMaxAttempts: number; + /** Executor repair attempts on action failure (default: 2) */ + executorRepairAttempts: number; + /** Maximum replan attempts (default: 2) */ + maxReplans: number; +} + +/** + * Stepwise (ReAct-style) planning configuration. + */ +export interface StepwisePlanningConfig { + /** Maximum steps per run (default: 20) */ + maxSteps: number; + /** Number of recent actions to include in context (default: 5) */ + actionHistoryLimit: number; + /** Whether to include page title/URL in context (default: true) */ + includePageContext: boolean; +} + +/** + * Full configuration for PlannerExecutorAgent. + */ +export interface PlannerExecutorConfig { + /** Snapshot escalation settings */ + snapshot: SnapshotEscalationConfig; + + /** Retry and verification settings */ + retry: RetryConfig; + + /** Stepwise planning settings */ + stepwise: StepwisePlanningConfig; + + /** Maximum tokens for planner LLM (default: 2048) */ + plannerMaxTokens: number; + + /** Temperature for planner LLM (default: 0.0) */ + plannerTemperature: number; + + /** Maximum tokens for executor LLM (default: 96) */ + executorMaxTokens: number; + + /** Temperature for executor LLM (default: 0.0) */ + executorTemperature: number; + + /** Whether to check predicates before step execution (default: true) */ + preStepVerification: boolean; + + /** Whether to enable verbose logging (default: false) */ + verbose: boolean; +} + +/** + * Default configuration values. + */ +export const DEFAULT_CONFIG: PlannerExecutorConfig = { + snapshot: { + enabled: true, + limitBase: 50, + limitStep: 25, + limitMax: 200, + }, + retry: { + verifyTimeoutMs: 10000, + verifyPollMs: 500, + verifyMaxAttempts: 4, + executorRepairAttempts: 2, + maxReplans: 2, + }, + stepwise: { + maxSteps: 20, + actionHistoryLimit: 5, + includePageContext: true, + }, + plannerMaxTokens: 2048, + plannerTemperature: 0.0, + executorMaxTokens: 96, + executorTemperature: 0.0, + preStepVerification: true, + verbose: false, +}; + +/** + * Pre-configured settings for common use cases. + */ +export enum ConfigPreset { + /** Default balanced configuration */ + DEFAULT = 'default', + /** Optimized for 4B-8B local models (Ollama) */ + LOCAL_SMALL_MODEL = 'local_small', + /** Optimized for high-capability cloud models (GPT-4, Claude) */ + CLOUD_HIGH_QUALITY = 'cloud_high', + /** Minimal retries for rapid development */ + FAST_ITERATION = 'fast', + /** Conservative settings for production reliability */ + PRODUCTION = 'production', +} + +/** + * Get a pre-configured PlannerExecutorConfig for common use cases. + * + * @param preset - Preset name or ConfigPreset enum value + * @returns PlannerExecutorConfig with preset values + * + * @example + * ```typescript + * import { getConfigPreset, ConfigPreset } from '@predicatesystems/runtime'; + * + * const config = getConfigPreset(ConfigPreset.LOCAL_SMALL_MODEL); + * ``` + */ +export function getConfigPreset(preset: ConfigPreset | string): PlannerExecutorConfig { + // Normalize to string for comparison (enum values are strings) + const presetKey: string = typeof preset === 'string' ? preset : (preset as string); + + switch (presetKey) { + case ConfigPreset.LOCAL_SMALL_MODEL as string: + case 'local_small': + // Optimized for local 4B-8B models (Ollama) + // - Tighter token limits work better with small models + // - More lenient timeouts for slower local inference + // - Verbose mode helpful for debugging local model behavior + return { + ...DEFAULT_CONFIG, + snapshot: { + enabled: true, + limitBase: 60, + limitStep: 30, + limitMax: 200, + }, + retry: { + verifyTimeoutMs: 15000, + verifyPollMs: 500, + verifyMaxAttempts: 6, + executorRepairAttempts: 3, + maxReplans: 2, + }, + plannerMaxTokens: 1024, + executorMaxTokens: 64, + verbose: true, + }; + + case ConfigPreset.CLOUD_HIGH_QUALITY as string: + case 'cloud_high': + // Optimized for high-capability cloud models (GPT-4, Claude) + // - Higher token limits for more detailed plans + // - Faster timeouts (cloud inference is quick) + // - Verbose off for cleaner output + return { + ...DEFAULT_CONFIG, + retry: { + verifyTimeoutMs: 10000, + verifyPollMs: 500, + verifyMaxAttempts: 4, + executorRepairAttempts: 2, + maxReplans: 2, + }, + plannerMaxTokens: 2048, + executorMaxTokens: 128, + verbose: false, + }; + + case ConfigPreset.FAST_ITERATION as string: + case 'fast': + // For rapid development and testing + // - Minimal retries to fail fast + // - Verbose for debugging + return { + ...DEFAULT_CONFIG, + retry: { + verifyTimeoutMs: 5000, + verifyPollMs: 500, + verifyMaxAttempts: 2, + executorRepairAttempts: 1, + maxReplans: 1, + }, + plannerMaxTokens: 1024, + executorMaxTokens: 64, + verbose: true, + }; + + case ConfigPreset.PRODUCTION as string: + case 'production': + // Conservative settings for production reliability + // - More retries for robustness + // - Longer timeouts for edge cases + // - No verbose output + return { + ...DEFAULT_CONFIG, + retry: { + verifyTimeoutMs: 20000, + verifyPollMs: 500, + verifyMaxAttempts: 8, + executorRepairAttempts: 3, + maxReplans: 3, + }, + plannerMaxTokens: 2048, + executorMaxTokens: 128, + verbose: false, + }; + + case ConfigPreset.DEFAULT as string: + case 'default': + default: + return { ...DEFAULT_CONFIG }; + } +} + +/** + * Deep partial type for nested configuration. + */ +export type DeepPartial = { + [P in keyof T]?: T[P] extends object ? DeepPartial : T[P]; +}; + +/** + * Merge partial config with defaults. + * + * @param partial - Partial configuration to merge + * @returns Complete PlannerExecutorConfig + */ +export function mergeConfig(partial: DeepPartial): PlannerExecutorConfig { + return { + ...DEFAULT_CONFIG, + ...partial, + snapshot: { ...DEFAULT_CONFIG.snapshot, ...(partial.snapshot ?? {}) }, + retry: { ...DEFAULT_CONFIG.retry, ...(partial.retry ?? {}) }, + stepwise: { ...DEFAULT_CONFIG.stepwise, ...(partial.stepwise ?? {}) }, + }; +} diff --git a/src/agents/planner-executor/index.ts b/src/agents/planner-executor/index.ts new file mode 100644 index 0000000..573ac3a --- /dev/null +++ b/src/agents/planner-executor/index.ts @@ -0,0 +1,33 @@ +/** + * PlannerExecutorAgent Module + * + * Two-tier LLM architecture for browser automation: + * - Planner (7B+ model): Generates JSON execution plans + * - Executor (3B-7B model): Executes steps with tight prompts + * + * Note: The full PlannerExecutorAgent class is not yet ported to TypeScript. + * This module provides configuration and factory helpers for when it is. + */ + +// Configuration +export { + SnapshotEscalationConfig, + RetryConfig, + StepwisePlanningConfig, + PlannerExecutorConfig, + DeepPartial, + ConfigPreset, + getConfigPreset, + mergeConfig, + DEFAULT_CONFIG, +} from './config'; + +// Factory +export { + CreateAgentOptions, + AgentProviders, + detectProvider, + createProvider, + resolveConfig, + createPlannerExecutorAgentProviders, +} from './agent-factory'; diff --git a/src/index.ts b/src/index.ts index 7a5239c..4ac98ba 100644 --- a/src/index.ts +++ b/src/index.ts @@ -42,6 +42,7 @@ export { LLMResponse, LocalLLMProvider, LocalVisionLLMProvider, + OllamaProvider, OpenAIProvider, AnthropicProvider, GLMProvider, @@ -107,7 +108,22 @@ export { type PermissionRecoveryConfig, type VisionFallbackConfig, type CaptchaConfig, -} from './agents/browser-agent'; + // Planner-Executor Agent (two-tier LLM architecture) + type SnapshotEscalationConfig, + type RetryConfig, + type StepwisePlanningConfig, + type PlannerExecutorConfig, + ConfigPreset, + getConfigPreset, + mergeConfig, + DEFAULT_CONFIG, + type CreateAgentOptions, + type AgentProviders, + detectProvider, + createProvider, + resolveConfig, + createPlannerExecutorAgentProviders, +} from './agents'; export * from './captcha/types'; export * from './captcha/strategies'; export * from './tools'; diff --git a/src/llm-provider.ts b/src/llm-provider.ts index 5acf2e3..4fd7753 100644 --- a/src/llm-provider.ts +++ b/src/llm-provider.ts @@ -55,11 +55,11 @@ export abstract class LLMProvider { * Override in subclasses that support vision. */ - async generateWithImage( - systemPrompt: string, - userPrompt: string, - imageBase64: string, - options: Record = {} + generateWithImage( + _systemPrompt: string, + _userPrompt: string, + _imageBase64: string, + _options: Record = {} ): Promise { throw new Error( `${this.constructor.name} does not support vision. ` + @@ -273,6 +273,89 @@ export class LocalVisionLLMProvider extends LocalLLMProvider { } } +/** + * Ollama Provider - dedicated wrapper for Ollama local LLM server. + * + * Ollama serves models locally and provides an OpenAI-compatible endpoint at /v1. + * This provider wraps LocalLLMProvider with sensible defaults for Ollama. + * + * @example + * ```typescript + * import { OllamaProvider } from '@predicatesystems/runtime'; + * + * const llm = new OllamaProvider({ model: 'qwen3:8b' }); + * const response = await llm.generate('You are helpful', 'Hello!'); + * console.log(response.content); + * ``` + * + * @example Custom base URL + * ```typescript + * const llm = new OllamaProvider({ + * model: 'llama3:8b', + * baseUrl: 'http://192.168.1.100:11434' + * }); + * ``` + */ +export class OllamaProvider extends LocalLLMProvider { + private _ollamaBaseUrl: string; + private _ollamaModelName: string; + + constructor( + options: { model: string; baseUrl?: string; timeoutMs?: number } = { model: 'qwen3:8b' } + ) { + const baseUrl = options.baseUrl ?? 'http://localhost:11434'; + // Ollama serves OpenAI-compatible API at /v1 + super({ + model: options.model, + baseUrl: `${baseUrl.replace(/\/$/, '')}/v1`, + apiKey: 'ollama', // Ollama doesn't require a real API key + timeoutMs: options.timeoutMs, + }); + this._ollamaBaseUrl = baseUrl; + this._ollamaModelName = options.model; + } + + /** + * Ollama runs locally. + */ + get isLocal(): boolean { + return true; + } + + /** + * Provider identifier. + */ + get providerName(): string { + return 'ollama'; + } + + /** + * Get the Ollama base URL (without /v1 suffix). + */ + get ollamaBaseUrl(): string { + return this._ollamaBaseUrl; + } + + /** + * JSON mode support varies by Ollama model. + * Most instruction-tuned models (qwen, llama, mistral) can output JSON + * with proper prompting, but native JSON mode is model-dependent. + */ + supportsJsonMode(): boolean { + // Conservative default: rely on prompt engineering for JSON + return false; + } + + /** + * Vision support varies by Ollama model. + * Models like llava, bakllava, moondream support vision. + */ + supportsVision(): boolean { + const modelLower = this._ollamaModelName.toLowerCase(); + return ['llava', 'bakllava', 'moondream'].some(v => modelLower.includes(v)); + } +} + /** * OpenAI Provider (GPT-4, GPT-4o, etc.) * Requires: npm install openai diff --git a/src/tracing/tracer-factory.ts b/src/tracing/tracer-factory.ts index 044f4ef..a18ed72 100644 --- a/src/tracing/tracer-factory.ts +++ b/src/tracing/tracer-factory.ts @@ -6,6 +6,7 @@ * PRODUCTION HARDENING: * - Recovers orphaned traces from previous crashes on SDK init (Risk #3) * - Passes runId to CloudTraceSink for persistent cache naming (Risk #1) + * - Auto-closes tracers on process exit to prevent data loss (atexit safety net) */ import * as path from 'path'; @@ -19,6 +20,83 @@ import { Tracer } from './tracer'; import { CloudTraceSink, SentienceLogger } from './cloud-sink'; import { JsonlTraceSink } from './jsonl-sink'; +// ============================================================================ +// Process Exit Cleanup (atexit safety net) +// ============================================================================ + +/** + * Global registry of active tracers for process exit cleanup. + * Uses Map with tracer ID as key to allow proper cleanup on close. + * + * Note: We store the tracer directly (not WeakRef) because: + * 1. Tracers are explicitly unregistered when close() is called + * 2. We need the tracer reference available during process exit + * 3. If a tracer is garbage collected without close(), we want to close it on exit + */ +const activeTracers: Map = new Map(); +let nextTracerId = 0; +let processExitHandlerRegistered = false; + +/** + * Cleanup handler called on process exit. + * Closes all active tracers to ensure trace data is uploaded to cloud. + * This prevents data loss when users forget to call tracer.close(). + */ +function cleanupTracersOnExit(): void { + for (const [tracerId, tracer] of activeTracers.entries()) { + if (tracer && !tracer.isClosed()) { + try { + // Use non-blocking close for exit handler + // Note: We can't await in exit handlers, so we do best-effort + tracer.close(false).catch(() => { + // Best effort - don't throw during exit + }); + } catch { + // Best effort - don't throw during exit + } + } + activeTracers.delete(tracerId); + } +} + +/** + * Register a tracer for automatic cleanup on process exit. + * @param tracer - Tracer instance to register + */ +function registerTracerForCleanup(tracer: Tracer): void { + const tracerId = nextTracerId++; + activeTracers.set(tracerId, tracer); + + // Set callback on tracer so it unregisters itself when closed + tracer._onCloseCallback = () => { + activeTracers.delete(tracerId); + }; + + // Register process exit handlers on first tracer creation + if (!processExitHandlerRegistered) { + // Handle normal exit + process.on('exit', cleanupTracersOnExit); + // Handle Ctrl+C + process.on('SIGINT', () => { + cleanupTracersOnExit(); + process.exit(130); + }); + // Handle kill + process.on('SIGTERM', () => { + cleanupTracersOnExit(); + process.exit(143); + }); + // Handle uncaught exceptions (best effort) + process.on('uncaughtException', error => { + console.error('Uncaught exception:', error); + cleanupTracersOnExit(); + process.exit(1); + }); + + processExitHandlerRegistered = true; + } +} + /** * Helper to emit run_start event with available metadata */ @@ -92,7 +170,7 @@ async function recoverOrphanedTraces( let orphanedFiles: string[]; try { orphanedFiles = fs.readdirSync(cacheDir).filter(f => f.endsWith('.jsonl')); - } catch (error) { + } catch { // Silently fail if directory read fails (permissions, etc.) return; } @@ -141,7 +219,7 @@ async function recoverOrphanedTraces( } // Silently skip other failures - don't log errors for orphan recovery // These are expected in many scenarios (network issues, invalid API keys, etc.) - } catch (error: any) { + } catch { // Silently skip failures - don't log errors for orphan recovery // These are expected in many scenarios (network issues, invalid API keys, etc.) } @@ -189,7 +267,7 @@ function httpPost( try { const parsed = responseBody ? JSON.parse(responseBody) : {}; resolve({ status: res.statusCode || 500, data: parsed }); - } catch (error) { + } catch { resolve({ status: res.statusCode || 500, data: {} }); } }); @@ -345,6 +423,9 @@ export async function createTracer(options: { options.screenshotProcessor ); + // Register for process exit cleanup (safety net for forgotten close()) + registerTracerForCleanup(tracer); + // Auto-emit run_start for complete trace structure if (options.autoEmitRunStart !== false) { emitRunStart(tracer, options.agentType, options.llmModel, options.goal, options.startUrl); @@ -383,6 +464,9 @@ export async function createTracer(options: { const tracer = new Tracer(runId, new JsonlTraceSink(localPath), options.screenshotProcessor); + // Register for process exit cleanup (ensures file is properly closed) + registerTracerForCleanup(tracer); + // Auto-emit run_start for complete trace structure if (options.autoEmitRunStart !== false) { emitRunStart(tracer, options.agentType, options.llmModel, options.goal, options.startUrl); @@ -409,5 +493,10 @@ export function createLocalTracer(runId?: string): Tracer { const localPath = path.join(tracesDir, `${traceRunId}.jsonl`); console.log(`💾 [Sentience] Local tracing: ${localPath}`); - return new Tracer(traceRunId, new JsonlTraceSink(localPath)); + const tracer = new Tracer(traceRunId, new JsonlTraceSink(localPath)); + + // Register for process exit cleanup (ensures file is properly closed) + registerTracerForCleanup(tracer); + + return tracer; } diff --git a/src/tracing/tracer.ts b/src/tracing/tracer.ts index 7990f24..4b2c081 100644 --- a/src/tracing/tracer.ts +++ b/src/tracing/tracer.ts @@ -27,6 +27,11 @@ export class Tracer { private stepFailures: number = 0; private hasErrors: boolean = false; + // Cleanup tracking (for process exit safety net) + private _closed: boolean = false; + // Callback invoked when close() is called (used by tracer-factory for cleanup registry) + public _onCloseCallback?: (tracer: Tracer) => void; + /** * Create a new Tracer * @param runId - Unique run identifier (UUID) @@ -319,6 +324,21 @@ export class Tracer { * @param blocking - If false, upload happens in background (default: true). Only applies to CloudTraceSink. */ async close(blocking: boolean = true): Promise { + // Prevent double-close + if (this._closed) { + return; + } + this._closed = true; + + // Notify cleanup registry (unregister from process exit handler) + if (this._onCloseCallback) { + try { + this._onCloseCallback(this); + } catch { + // Don't let callback errors prevent close + } + } + // Auto-infer finalStatus if not explicitly set and we have step outcomes if ( this.finalStatus === 'unknown' && @@ -335,6 +355,13 @@ export class Tracer { } } + /** + * Check if tracer has been closed + */ + isClosed(): boolean { + return this._closed; + } + /** * Get run ID */ diff --git a/tests/local-llm-provider.test.ts b/tests/local-llm-provider.test.ts index a7d582c..1dad157 100644 --- a/tests/local-llm-provider.test.ts +++ b/tests/local-llm-provider.test.ts @@ -1,4 +1,4 @@ -import { LocalLLMProvider, LocalVisionLLMProvider } from '../src/llm-provider'; +import { LocalLLMProvider, LocalVisionLLMProvider, OllamaProvider } from '../src/llm-provider'; describe('LocalLLMProvider (OpenAI-compatible)', () => { const originalFetch = (globalThis as any).fetch; @@ -73,3 +73,127 @@ describe('LocalVisionLLMProvider (OpenAI-compatible)', () => { ); }); }); + +describe('OllamaProvider', () => { + const originalFetch = (globalThis as any).fetch; + + afterEach(() => { + (globalThis as any).fetch = originalFetch; + }); + + it('should extend LocalLLMProvider', () => { + const llm = new OllamaProvider({ model: 'qwen3:8b' }); + expect(llm).toBeInstanceOf(LocalLLMProvider); + }); + + it('should use default base URL http://localhost:11434', () => { + const llm = new OllamaProvider({ model: 'qwen3:8b' }); + expect(llm.ollamaBaseUrl).toBe('http://localhost:11434'); + }); + + it('should accept custom base URL', () => { + const llm = new OllamaProvider({ + model: 'llama3:8b', + baseUrl: 'http://192.168.1.100:11434', + }); + expect(llm.ollamaBaseUrl).toBe('http://192.168.1.100:11434'); + }); + + it('should strip trailing slash from base URL', () => { + const llm = new OllamaProvider({ + model: 'mistral:7b', + baseUrl: 'http://localhost:11434/', + }); + expect(llm.ollamaBaseUrl).toBe('http://localhost:11434/'); + }); + + it('should report isLocal as true', () => { + const llm = new OllamaProvider({ model: 'qwen3:4b' }); + expect(llm.isLocal).toBe(true); + }); + + it('should report providerName as ollama', () => { + const llm = new OllamaProvider({ model: 'phi3:mini' }); + expect(llm.providerName).toBe('ollama'); + }); + + it('should report modelName correctly', () => { + const llm = new OllamaProvider({ model: 'qwen3:8b' }); + expect(llm.modelName).toBe('qwen3:8b'); + }); + + it('should return false for supportsJsonMode (conservative default)', () => { + const llm = new OllamaProvider({ model: 'qwen3:8b' }); + expect(llm.supportsJsonMode()).toBe(false); + }); + + it('should detect vision support for llava models', () => { + const llm = new OllamaProvider({ model: 'llava:7b' }); + expect(llm.supportsVision()).toBe(true); + }); + + it('should detect vision support for bakllava models', () => { + const llm = new OllamaProvider({ model: 'bakllava:latest' }); + expect(llm.supportsVision()).toBe(true); + }); + + it('should detect vision support for moondream models', () => { + const llm = new OllamaProvider({ model: 'moondream:1.8b' }); + expect(llm.supportsVision()).toBe(true); + }); + + it('should return false for vision on text-only models', () => { + expect(new OllamaProvider({ model: 'qwen3:8b' }).supportsVision()).toBe(false); + expect(new OllamaProvider({ model: 'llama3:8b' }).supportsVision()).toBe(false); + expect(new OllamaProvider({ model: 'mistral:7b' }).supportsVision()).toBe(false); + }); + + it('should call /v1/chat/completions endpoint', async () => { + let capturedUrl: string = ''; + (globalThis as any).fetch = jest.fn(async (url: string) => { + capturedUrl = url; + return { + ok: true, + status: 200, + text: async () => + JSON.stringify({ + model: 'qwen3:8b', + choices: [{ message: { content: 'Hello!' } }], + usage: { prompt_tokens: 5, completion_tokens: 2, total_tokens: 7 }, + }), + }; + }); + + const llm = new OllamaProvider({ model: 'qwen3:8b' }); + const resp = await llm.generate('You are helpful', 'Hi'); + + expect(resp.content).toBe('Hello!'); + expect(resp.modelName).toBe('qwen3:8b'); + expect(capturedUrl).toBe('http://localhost:11434/v1/chat/completions'); + }); + + it('should work with custom base URL in API calls', async () => { + let capturedUrl: string = ''; + (globalThis as any).fetch = jest.fn(async (url: string) => { + capturedUrl = url; + return { + ok: true, + status: 200, + text: async () => + JSON.stringify({ + model: 'llama3:8b', + choices: [{ message: { content: 'Response' } }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + }; + }); + + const llm = new OllamaProvider({ + model: 'llama3:8b', + baseUrl: 'http://remote-server:11434', + }); + await llm.generate('sys', 'user'); + + expect(capturedUrl).toBe('http://remote-server:11434/v1/chat/completions'); + }); +}); diff --git a/tests/planner-executor-config.test.ts b/tests/planner-executor-config.test.ts new file mode 100644 index 0000000..4ec6763 --- /dev/null +++ b/tests/planner-executor-config.test.ts @@ -0,0 +1,225 @@ +import { + ConfigPreset, + getConfigPreset, + mergeConfig, + DEFAULT_CONFIG, + detectProvider, + createProvider, + resolveConfig, + createPlannerExecutorAgentProviders, +} from '../src/agents/planner-executor'; +import { OllamaProvider, OpenAIProvider, AnthropicProvider } from '../src/llm-provider'; + +describe('ConfigPreset', () => { + it('should have all expected preset values', () => { + expect(ConfigPreset.DEFAULT).toBe('default'); + expect(ConfigPreset.LOCAL_SMALL_MODEL).toBe('local_small'); + expect(ConfigPreset.CLOUD_HIGH_QUALITY).toBe('cloud_high'); + expect(ConfigPreset.FAST_ITERATION).toBe('fast'); + expect(ConfigPreset.PRODUCTION).toBe('production'); + }); +}); + +describe('getConfigPreset', () => { + it('should return default config for DEFAULT preset', () => { + const config = getConfigPreset(ConfigPreset.DEFAULT); + expect(config).toEqual(DEFAULT_CONFIG); + }); + + it('should return optimized config for LOCAL_SMALL_MODEL', () => { + const config = getConfigPreset(ConfigPreset.LOCAL_SMALL_MODEL); + expect(config.plannerMaxTokens).toBe(1024); + expect(config.executorMaxTokens).toBe(64); + expect(config.retry.verifyTimeoutMs).toBe(15000); + expect(config.retry.verifyMaxAttempts).toBe(6); + expect(config.verbose).toBe(true); + }); + + it('should return optimized config for CLOUD_HIGH_QUALITY', () => { + const config = getConfigPreset(ConfigPreset.CLOUD_HIGH_QUALITY); + expect(config.plannerMaxTokens).toBe(2048); + expect(config.executorMaxTokens).toBe(128); + expect(config.retry.verifyTimeoutMs).toBe(10000); + expect(config.verbose).toBe(false); + }); + + it('should return fast iteration config', () => { + const config = getConfigPreset(ConfigPreset.FAST_ITERATION); + expect(config.retry.verifyMaxAttempts).toBe(2); + expect(config.retry.maxReplans).toBe(1); + expect(config.verbose).toBe(true); + }); + + it('should return production config', () => { + const config = getConfigPreset(ConfigPreset.PRODUCTION); + expect(config.retry.verifyMaxAttempts).toBe(8); + expect(config.retry.verifyTimeoutMs).toBe(20000); + expect(config.verbose).toBe(false); + }); + + it('should accept string preset names', () => { + const config = getConfigPreset('local_small'); + expect(config.plannerMaxTokens).toBe(1024); + }); +}); + +describe('mergeConfig', () => { + it('should merge partial config with defaults', () => { + const config = mergeConfig({ verbose: true }); + expect(config.verbose).toBe(true); + expect(config.plannerMaxTokens).toBe(DEFAULT_CONFIG.plannerMaxTokens); + }); + + it('should merge nested config objects', () => { + const config = mergeConfig({ + retry: { verifyTimeoutMs: 5000 }, + }); + expect(config.retry.verifyTimeoutMs).toBe(5000); + expect(config.retry.verifyMaxAttempts).toBe(DEFAULT_CONFIG.retry.verifyMaxAttempts); + }); +}); + +describe('detectProvider', () => { + it('should detect OpenAI for GPT models', () => { + expect(detectProvider('gpt-4o')).toBe('openai'); + expect(detectProvider('gpt-4-turbo')).toBe('openai'); + expect(detectProvider('gpt-4o-mini')).toBe('openai'); + expect(detectProvider('GPT-4o')).toBe('openai'); + }); + + it('should detect OpenAI for o1/o3/o4 models', () => { + expect(detectProvider('o1-preview')).toBe('openai'); + expect(detectProvider('o1-mini')).toBe('openai'); + expect(detectProvider('o3-mini')).toBe('openai'); + }); + + it('should detect Anthropic for Claude models', () => { + expect(detectProvider('claude-3-opus-20240229')).toBe('anthropic'); + expect(detectProvider('claude-3-5-sonnet-20241022')).toBe('anthropic'); + expect(detectProvider('Claude-3-Opus')).toBe('anthropic'); + }); + + it('should detect Ollama for common local models', () => { + expect(detectProvider('qwen3:8b')).toBe('ollama'); + expect(detectProvider('llama3:8b')).toBe('ollama'); + expect(detectProvider('phi3:mini')).toBe('ollama'); + expect(detectProvider('mistral:7b')).toBe('ollama'); + expect(detectProvider('gemma:2b')).toBe('ollama'); + expect(detectProvider('deepseek:6.7b')).toBe('ollama'); + expect(detectProvider('codellama:7b')).toBe('ollama'); + }); + + it('should detect Ollama for model:tag format', () => { + expect(detectProvider('custom-model:latest')).toBe('ollama'); + expect(detectProvider('my-finetuned:v2')).toBe('ollama'); + }); + + it('should default to Ollama for unknown models', () => { + expect(detectProvider('some-unknown-model')).toBe('ollama'); + }); +}); + +describe('createProvider', () => { + it('should create OllamaProvider for ollama', () => { + const provider = createProvider('qwen3:8b', 'ollama', {}); + expect(provider).toBeInstanceOf(OllamaProvider); + expect(provider.modelName).toBe('qwen3:8b'); + }); + + it('should create OllamaProvider with custom base URL', () => { + const provider = createProvider('llama3:8b', 'ollama', { + ollamaBaseUrl: 'http://192.168.1.100:11434', + }) as OllamaProvider; + expect(provider.ollamaBaseUrl).toBe('http://192.168.1.100:11434'); + }); + + it('should create OpenAIProvider for openai', () => { + const provider = createProvider('gpt-4o', 'openai', { + openaiApiKey: 'test-key', + }); + expect(provider).toBeInstanceOf(OpenAIProvider); + expect(provider.modelName).toBe('gpt-4o'); + }); + + it('should create AnthropicProvider for anthropic', () => { + const provider = createProvider('claude-3-opus-20240229', 'anthropic', { + anthropicApiKey: 'test-key', + }); + expect(provider).toBeInstanceOf(AnthropicProvider); + expect(provider.modelName).toBe('claude-3-opus-20240229'); + }); + + it('should auto-detect provider', () => { + const provider = createProvider('qwen3:8b', 'auto', {}); + expect(provider).toBeInstanceOf(OllamaProvider); + }); + + it('should throw for unknown provider', () => { + expect(() => { + createProvider('test', 'invalid' as any, {}); + }).toThrow(/Unknown provider/); + }); +}); + +describe('resolveConfig', () => { + it('should return default config when undefined', () => { + const config = resolveConfig(); + expect(config).toEqual(DEFAULT_CONFIG); + }); + + it('should resolve string preset', () => { + const config = resolveConfig('local_small'); + expect(config.plannerMaxTokens).toBe(1024); + }); + + it('should resolve ConfigPreset enum', () => { + const config = resolveConfig(ConfigPreset.PRODUCTION); + expect(config.retry.verifyMaxAttempts).toBe(8); + }); + + it('should merge partial config', () => { + const config = resolveConfig({ verbose: true }); + expect(config.verbose).toBe(true); + expect(config.plannerMaxTokens).toBe(DEFAULT_CONFIG.plannerMaxTokens); + }); +}); + +describe('createPlannerExecutorAgentProviders', () => { + it('should create providers with minimal config', async () => { + const result = await createPlannerExecutorAgentProviders({ + plannerModel: 'qwen3:8b', + executorModel: 'qwen3:4b', + }); + + expect(result.planner).toBeInstanceOf(OllamaProvider); + expect(result.executor).toBeInstanceOf(OllamaProvider); + expect(result.planner.modelName).toBe('qwen3:8b'); + expect(result.executor.modelName).toBe('qwen3:4b'); + expect(result.config).toEqual(DEFAULT_CONFIG); + expect(result.tracer).toBeUndefined(); + }); + + it('should use config preset', async () => { + const result = await createPlannerExecutorAgentProviders({ + plannerModel: 'qwen3:8b', + executorModel: 'qwen3:4b', + config: ConfigPreset.LOCAL_SMALL_MODEL, + }); + + expect(result.config.plannerMaxTokens).toBe(1024); + expect(result.config.verbose).toBe(true); + }); + + it('should support mixed providers', async () => { + const result = await createPlannerExecutorAgentProviders({ + plannerModel: 'gpt-4o', + plannerProvider: 'openai', + executorModel: 'qwen3:4b', + executorProvider: 'ollama', + openaiApiKey: 'test-key', + }); + + expect(result.planner).toBeInstanceOf(OpenAIProvider); + expect(result.executor).toBeInstanceOf(OllamaProvider); + }); +}); diff --git a/tests/tracing/tracer-factory.test.ts b/tests/tracing/tracer-factory.test.ts index 3392300..17f5928 100644 --- a/tests/tracing/tracer-factory.test.ts +++ b/tests/tracing/tracer-factory.test.ts @@ -394,3 +394,129 @@ describe('createLocalTracer', () => { expect(tracer.getSinkType()).toContain('JsonlTraceSink'); }); }); + +describe('Tracer cleanup functionality', () => { + const testTracesDir = path.join(process.cwd(), 'traces'); + + beforeEach(() => { + // Create traces directory + if (!fs.existsSync(testTracesDir)) { + fs.mkdirSync(testTracesDir, { recursive: true }); + } + }); + + afterEach(() => { + // Cleanup traces directory + if (fs.existsSync(testTracesDir)) { + const files = fs.readdirSync(testTracesDir); + files.forEach(file => { + const filePath = path.join(testTracesDir, file); + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + }); + } + }); + + describe('isClosed method', () => { + it('should return false for new tracer', () => { + const tracer = createLocalTracer('test-closed-1'); + expect(tracer.isClosed()).toBe(false); + }); + + it('should return true after close is called', async () => { + const tracer = createLocalTracer('test-closed-2'); + expect(tracer.isClosed()).toBe(false); + + await tracer.close(); + + expect(tracer.isClosed()).toBe(true); + }); + }); + + describe('double-close prevention', () => { + it('should prevent double close from causing issues', async () => { + const tracer = createLocalTracer('test-double-close'); + tracer.emitRunStart('TestAgent', 'gpt-4'); + + // First close should work + await tracer.close(); + expect(tracer.isClosed()).toBe(true); + + // Second close should be a no-op (no error) + await tracer.close(); + expect(tracer.isClosed()).toBe(true); + }); + }); + + describe('_onCloseCallback', () => { + it('should call callback when close is invoked', async () => { + const tracer = createLocalTracer('test-callback'); + let callbackInvoked = false; + let callbackTracer: any = null; + + tracer._onCloseCallback = (t: any) => { + callbackInvoked = true; + callbackTracer = t; + }; + + expect(callbackInvoked).toBe(false); + + await tracer.close(); + + expect(callbackInvoked).toBe(true); + expect(callbackTracer).toBe(tracer); + }); + + it('should handle callback errors gracefully', async () => { + const tracer = createLocalTracer('test-callback-error'); + + tracer._onCloseCallback = () => { + throw new Error('Callback error'); + }; + + // Should not throw even when callback throws + await expect(tracer.close()).resolves.not.toThrow(); + expect(tracer.isClosed()).toBe(true); + }); + + it('should not call callback on second close', async () => { + const tracer = createLocalTracer('test-callback-once'); + let callCount = 0; + + tracer._onCloseCallback = () => { + callCount++; + }; + + await tracer.close(); + expect(callCount).toBe(1); + + await tracer.close(); + expect(callCount).toBe(1); // Still 1, not called again + }); + }); + + describe('createTracer registers cleanup callback', () => { + it('should set _onCloseCallback on created tracer', async () => { + const tracer = await createTracer({ + runId: 'test-cleanup-registration', + }); + + // Factory should have set up the callback + expect(tracer._onCloseCallback).toBeDefined(); + expect(typeof tracer._onCloseCallback).toBe('function'); + + await tracer.close(); + }); + }); + + describe('createLocalTracer registers cleanup callback', () => { + it('should set _onCloseCallback on created tracer', () => { + const tracer = createLocalTracer('test-local-cleanup'); + + // Factory should have set up the callback + expect(tracer._onCloseCallback).toBeDefined(); + expect(typeof tracer._onCloseCallback).toBe('function'); + }); + }); +});