From 9e5df63c2a1451032ac02a32dd890a4d0b3186bf Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Sat, 14 Mar 2026 00:31:58 -0500 Subject: [PATCH] feat(toolkit): align fetchkit with toolkit library contract --- Cargo.lock | 1 + Cargo.toml | 3 + README.md | 15 + crates/fetchkit-cli/src/mcp.rs | 65 +- crates/fetchkit-cli/tests/cli_integration.rs | 8 +- crates/fetchkit-python/src/lib.rs | 2 +- crates/fetchkit/Cargo.toml | 1 + crates/fetchkit/src/error.rs | 24 + crates/fetchkit/src/lib.rs | 140 +-- crates/fetchkit/src/tool.rs | 843 ++++++++++++++++--- crates/fetchkit/tests/integration.rs | 60 ++ 11 files changed, 885 insertions(+), 277 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b057e31..2a1d824 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -423,6 +423,7 @@ dependencies = [ "thiserror 2.0.18", "tokio", "tokio-test", + "tower", "tracing", "tracing-subscriber", "url", diff --git a/Cargo.toml b/Cargo.toml index 0620dda..f1ac0f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,3 +48,6 @@ tempfile = "3" # Async traits async-trait = "0.1" + +# Service trait +tower = { version = "0.5", features = ["util"] } diff --git a/README.md b/README.md index 3d8ac2e..a45f2b6 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,21 @@ let request = FetchRequest::new("https://example.com"); let response = tool.execute(request).await.unwrap(); ``` +### Toolkit Contract Surface + +```rust +use fetchkit::ToolBuilder; + +let builder = ToolBuilder::new().enable_save_to_file(true); +let tool = builder.build(); + +assert_eq!(tool.name(), "web_fetch"); +assert_eq!(tool.display_name(), "Web Fetch"); + +let definition = builder.build_tool_definition(); +let mut service = builder.build_service(); +``` + ### Hardened Tool Profile ```rust diff --git a/crates/fetchkit-cli/src/mcp.rs b/crates/fetchkit-cli/src/mcp.rs index 79f6441..18f520d 100644 --- a/crates/fetchkit-cli/src/mcp.rs +++ b/crates/fetchkit-cli/src/mcp.rs @@ -1,6 +1,6 @@ //! MCP (Model Context Protocol) server implementation -use fetchkit::{FetchRequest, Tool}; +use fetchkit::Tool; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::io::{self, BufRead, Write}; @@ -105,23 +105,14 @@ impl McpServer { } fn handle_tools_list(&self, id: Option) -> JsonRpcResponse { - let input_schema = json!({ - "type": "object", - "properties": { - "url": { - "type": "string", - "description": "The URL to fetch (required, must be http:// or https://)" - } - }, - "required": ["url"] - }); + let input_schema = self.tool.input_schema(); JsonRpcResponse::success( id, json!({ "tools": [{ - "name": "fetchkit", - "description": "Fetch URL and return markdown with metadata frontmatter. Optimized for LLM consumption.", + "name": self.tool.name(), + "description": self.tool.description(), "inputSchema": input_schema }] }), @@ -134,30 +125,48 @@ impl McpServer { .and_then(|v| v.as_str()) .unwrap_or_default(); - if tool_name != "fetchkit" { + if tool_name != self.tool.name() { return JsonRpcResponse::error(id, -32602, format!("Unknown tool: {}", tool_name)); } - self.handle_fetchkit_call(id, params).await + self.handle_tool_call(id, params).await } - async fn handle_fetchkit_call(&self, id: Option, params: Value) -> JsonRpcResponse { - let arguments = params.get("arguments").cloned().unwrap_or(json!({})); + async fn handle_tool_call(&self, id: Option, params: Value) -> JsonRpcResponse { + let mut arguments = params.get("arguments").cloned().unwrap_or(json!({})); + if let Some(object) = arguments.as_object_mut() { + let wants_head = object + .get("method") + .and_then(|value| value.as_str()) + .is_some_and(|method| method.eq_ignore_ascii_case("HEAD")); + let has_output_mode = object.contains_key("as_markdown") + || object.contains_key("as_text") + || object.contains_key("save_to_file"); - // Extract URL from arguments - let url = match arguments.get("url").and_then(|v| v.as_str()) { - Some(u) => u.to_string(), - None => { - return JsonRpcResponse::error(id, -32602, "Missing required argument: url"); + if !wants_head && !has_output_mode { + object.insert("as_markdown".to_string(), json!(true)); } - }; + } - // Build request with markdown conversion - let request = FetchRequest::new(url).as_markdown(); + let execution = match self.tool.execution(arguments) { + Ok(execution) => execution, + Err(err) => { + return JsonRpcResponse::success( + id, + json!({ + "content": [{ + "type": "text", + "text": format!("Error: {}", err) + }], + "isError": true + }), + ); + } + }; - // Execute tool - match self.tool.execute(request).await { - Ok(response) => { + match execution.execute().await { + Ok(output) => { + let response = serde_json::from_value(output.result).unwrap_or_default(); let output = format_md_with_frontmatter(&response); JsonRpcResponse::success( id, diff --git a/crates/fetchkit-cli/tests/cli_integration.rs b/crates/fetchkit-cli/tests/cli_integration.rs index 469b464..136bde1 100644 --- a/crates/fetchkit-cli/tests/cli_integration.rs +++ b/crates/fetchkit-cli/tests/cli_integration.rs @@ -111,9 +111,9 @@ fn test_llmtxt_flag() { let stdout = String::from_utf8_lossy(&output.stdout); assert!(output.status.success()); - assert!(stdout.contains("FetchKit Tool")); - assert!(stdout.contains("Capabilities")); - assert!(stdout.contains("Input Parameters")); + assert!(stdout.contains("# Web Fetch")); + assert!(stdout.contains("## Parameters")); + assert!(stdout.contains("**Name:** `web_fetch`")); } // ============================================================================ @@ -265,7 +265,7 @@ fn test_mcp_initialize() { assert_eq!(list_resp["id"], 2); let tools = list_resp["result"]["tools"].as_array().unwrap(); assert_eq!(tools.len(), 1); - assert_eq!(tools[0]["name"], "fetchkit"); + assert_eq!(tools[0]["name"], "web_fetch"); assert!(tools[0]["inputSchema"].is_object()); } diff --git a/crates/fetchkit-python/src/lib.rs b/crates/fetchkit-python/src/lib.rs index 8ceceac..b71e06f 100644 --- a/crates/fetchkit-python/src/lib.rs +++ b/crates/fetchkit-python/src/lib.rs @@ -266,7 +266,7 @@ impl PyFetchKitTool { /// Get tool description fn description(&self) -> String { - self.inner.description() + self.inner.description().to_string() } /// Get system prompt diff --git a/crates/fetchkit/Cargo.toml b/crates/fetchkit/Cargo.toml index eeb5008..6d3eccd 100644 --- a/crates/fetchkit/Cargo.toml +++ b/crates/fetchkit/Cargo.toml @@ -22,6 +22,7 @@ thiserror = { workspace = true } futures = { workspace = true } bytes = { workspace = true } async-trait = { workspace = true } +tower = { workspace = true } [dev-dependencies] wiremock = { workspace = true } diff --git a/crates/fetchkit/src/error.rs b/crates/fetchkit/src/error.rs index 05e5a7b..adba5e2 100644 --- a/crates/fetchkit/src/error.rs +++ b/crates/fetchkit/src/error.rs @@ -80,6 +80,24 @@ impl FetchError { } } +/// Errors returned by the toolkit-library contract surface. +#[derive(Debug, Error)] +pub enum ToolError { + /// Safe to surface to an LLM or end user. + #[error("{0}")] + UserFacing(String), + /// Internal/operator-facing failure details. + #[error("{0}")] + Internal(String), +} + +impl ToolError { + /// Whether this error is safe to show to the LLM. + pub fn is_user_facing(&self) -> bool { + matches!(self, Self::UserFacing(_)) + } +} + #[cfg(test)] mod tests { use super::*; @@ -107,4 +125,10 @@ mod tests { "Request timed out: server did not respond within 1 second" ); } + + #[test] + fn test_tool_error_classification() { + assert!(ToolError::UserFacing("url is required".to_string()).is_user_facing()); + assert!(!ToolError::Internal("serde failure".to_string()).is_user_facing()); + } } diff --git a/crates/fetchkit/src/lib.rs b/crates/fetchkit/src/lib.rs index e148446..0725ba8 100644 --- a/crates/fetchkit/src/lib.rs +++ b/crates/fetchkit/src/lib.rs @@ -73,142 +73,22 @@ mod types; pub use client::{fetch, fetch_with_options, FetchOptions}; pub use convert::{html_to_markdown, html_to_text}; pub use dns::DnsPolicy; -pub use error::FetchError; +pub use error::{FetchError, ToolError}; pub use fetchers::{DefaultFetcher, Fetcher, FetcherRegistry, GitHubRepoFetcher}; pub use file_saver::{FileSaveError, FileSaver, LocalFileSaver, SaveResult}; -pub use tool::{Tool, ToolBuilder, ToolStatus}; +pub use tool::{ + Tool, ToolBuilder, ToolExecution, ToolImage, ToolOutput, ToolOutputMetadata, ToolService, + ToolStatus, +}; pub use types::{FetchRequest, FetchResponse, HttpMethod}; /// Default User-Agent string pub const DEFAULT_USER_AGENT: &str = "Everruns FetchKit/1.0"; -// -- Tool description fragments (composed dynamically by Tool methods) -- +/// Backward-compatible full description string with file-saving enabled. +pub const TOOL_DESCRIPTION: &str = + "Fetch URL content as text or markdown; return metadata for binary responses or save bytes to file."; -/// Base tool description (always included) -pub(crate) const TOOL_DESCRIPTION_BASE: &str = "\ -Fetches content from a URL and optionally converts HTML to markdown or text. - -- Supports GET and HEAD methods -- Converts HTML to markdown or plain text -- Returns metadata for binary content -- Strict timeouts for reliability"; - -/// Save-to-file line appended to description when enabled -pub(crate) const TOOL_DESCRIPTION_SAVE: &str = "\n- File download (save_to_file)"; - -// -- TOOL_LLMTXT fragments -- - -pub(crate) const TOOL_LLMTXT_HEADER: &str = "\ -# FetchKit Tool - -Fetches content from a URL and optionally converts HTML to markdown or text. - -## Capabilities -- HTTP GET and HEAD requests -- HTML to Markdown conversion -- HTML to plain text conversion -- Binary content detection (returns metadata only) -- Automatic timeout handling - -## Input Parameters -- `url` (required): The URL to fetch (must be http:// or https://) -- `method` (optional): GET or HEAD (default: GET) -- `as_markdown` (optional): Convert HTML to markdown -- `as_text` (optional): Convert HTML to plain text"; - -pub(crate) const TOOL_LLMTXT_SAVE_INPUT: &str = "\ -\n- `save_to_file` (optional): Save response body to this path instead of returning inline content. \ -Accepts binary content (images, PDFs, archives). Requires file saving to be enabled."; - -pub(crate) const TOOL_LLMTXT_OUTPUT_BASE: &str = " - -## Output Fields -- `url`: The fetched URL -- `status_code`: HTTP status code -- `content_type`: Content-Type header value -- `size`: Content size in bytes -- `last_modified`: Last-Modified header value -- `filename`: Extracted filename -- `format`: \"markdown\", \"text\", or \"raw\" -- `content`: The fetched/converted content -- `truncated`: True if content was truncated due to timeout -- `method`: \"HEAD\" for HEAD requests -- `error`: Error message for binary content"; - -pub(crate) const TOOL_LLMTXT_SAVE_OUTPUT: &str = "\ -\n- `saved_path`: Path where file was saved (when save_to_file was used) -- `bytes_written`: Bytes written to file (when save_to_file was used)"; - -pub(crate) const TOOL_LLMTXT_EXAMPLES_BASE: &str = " - -## Examples - -### Fetch a webpage as markdown -```json -{\"url\": \"https://example.com\", \"as_markdown\": true} -``` - -### Check if a URL exists (HEAD request) -```json -{\"url\": \"https://example.com/file.pdf\", \"method\": \"HEAD\"} -``` - -### Fetch raw content -```json -{\"url\": \"https://api.example.com/data.json\"} -```"; - -pub(crate) const TOOL_LLMTXT_SAVE_EXAMPLE: &str = " - -### Download a file -```json -{\"url\": \"https://example.com/image.png\", \"save_to_file\": \"image.png\"} -```"; - -pub(crate) const TOOL_LLMTXT_ERRORS_BASE: &str = " - -## Error Handling -- Invalid URLs return an error -- Binary content returns metadata with error message -- Timeouts return partial content with truncated flag"; - -pub(crate) const TOOL_LLMTXT_SAVE_ERRORS: &str = "\ -\n- Binary content is accepted when using save_to_file\n\ -- File saving errors include path validation and IO failures"; - -/// Compose full TOOL_DESCRIPTION with all features (for backward compat / CLI) -pub const TOOL_DESCRIPTION: &str = "\ -Fetches content from a URL and optionally converts HTML to markdown or text.\n\ -\n\ -- Supports GET and HEAD methods\n\ -- Converts HTML to markdown or plain text\n\ -- Returns metadata for binary content\n\ -- Strict timeouts for reliability\n\ -- File download (save_to_file)"; - -/// Compose full TOOL_LLMTXT with all features (for backward compat / CLI) +/// Backward-compatible help document with file-saving enabled. pub static TOOL_LLMTXT: std::sync::LazyLock = - std::sync::LazyLock::new(|| build_llmtxt(true)); - -/// Build llmtxt string with optional save_to_file sections -pub(crate) fn build_llmtxt(include_save: bool) -> String { - let mut s = String::with_capacity(2048); - s.push_str(TOOL_LLMTXT_HEADER); - if include_save { - s.push_str(TOOL_LLMTXT_SAVE_INPUT); - } - s.push_str(TOOL_LLMTXT_OUTPUT_BASE); - if include_save { - s.push_str(TOOL_LLMTXT_SAVE_OUTPUT); - } - s.push_str(TOOL_LLMTXT_EXAMPLES_BASE); - if include_save { - s.push_str(TOOL_LLMTXT_SAVE_EXAMPLE); - } - s.push_str(TOOL_LLMTXT_ERRORS_BASE); - if include_save { - s.push_str(TOOL_LLMTXT_SAVE_ERRORS); - } - s.push('\n'); - s -} + std::sync::LazyLock::new(|| Tool::builder().enable_save_to_file(true).build().help()); diff --git a/crates/fetchkit/src/tool.rs b/crates/fetchkit/src/tool.rs index 548d5ef..078902f 100644 --- a/crates/fetchkit/src/tool.rs +++ b/crates/fetchkit/src/tool.rs @@ -1,14 +1,23 @@ -//! Tool builder and contract for FetchKit +//! Tool builder and toolkit-library contract for FetchKit. +// +// DECISION: keep the legacy typed `execute`/`llmtxt` surface as wrappers around the +// toolkit-library contract so existing fetchkit callers can migrate incrementally. use crate::client::{fetch_with_options, FetchOptions}; use crate::dns::DnsPolicy; -use crate::error::FetchError; +use crate::error::{FetchError, ToolError}; use crate::fetchers::FetcherRegistry; use crate::file_saver::FileSaver; use crate::types::{FetchRequest, FetchResponse}; -use crate::{build_llmtxt, TOOL_DESCRIPTION_BASE, TOOL_DESCRIPTION_SAVE}; -use schemars::schema_for; +use futures::future::BoxFuture; use serde::{Deserialize, Serialize}; +use serde_json::{json, Map, Value}; +use std::task::{Context, Poll}; +use std::time::Instant; +use tower::Service; + +const DEFAULT_LOCALE: &str = "en-US"; +const TOOL_NAME: &str = "web_fetch"; /// Status update during tool execution #[derive(Debug, Clone, Serialize, Deserialize)] @@ -56,6 +65,37 @@ impl ToolStatus { } } +/// Output image returned by the toolkit-library contract. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct ToolImage { + pub base64: String, + pub media_type: String, +} + +/// Consumer-only metadata returned by the toolkit-library contract. +#[derive(Debug, Clone)] +pub struct ToolOutputMetadata { + pub duration: std::time::Duration, + pub extra: Value, +} + +impl Default for ToolOutputMetadata { + fn default() -> Self { + Self { + duration: std::time::Duration::default(), + extra: json!({}), + } + } +} + +/// Structured tool output for the toolkit-library contract. +#[derive(Debug, Clone, Default)] +pub struct ToolOutput { + pub result: Value, + pub images: Vec, + pub metadata: ToolOutputMetadata, +} + /// Builder for configuring the FetchKit tool /// /// # Examples @@ -71,10 +111,11 @@ impl ToolStatus { /// .block_prefix("https://internal.example.com") /// .build(); /// -/// assert!(!tool.description().is_empty()); +/// assert_eq!(tool.name(), "web_fetch"); /// ``` #[derive(Debug, Clone, Default)] pub struct ToolBuilder { + locale: String, /// Enable as_markdown option enable_markdown: bool, /// Enable as_text option @@ -105,12 +146,19 @@ impl ToolBuilder { /// Create a new tool builder with all options enabled pub fn new() -> Self { Self { + locale: DEFAULT_LOCALE.to_string(), enable_markdown: true, enable_text: true, ..Default::default() } } + /// Set the locale for user-facing metadata and tool errors. + pub fn locale(mut self, locale: &str) -> Self { + self.locale = normalize_locale(locale); + self + } + /// Enable as_markdown option pub fn enable_markdown(mut self, enable: bool) -> Self { self.enable_markdown = enable; @@ -244,57 +292,76 @@ impl ToolBuilder { self } - /// Build the tool - pub fn build(self) -> Tool { + /// Build the full tool metadata + execution factory. + pub fn build(&self) -> Tool { Tool { + locale: self.locale.clone(), + display_name: display_name(&self.locale).to_string(), + description: description(&self.locale, self.enable_save_to_file), + version: env!("CARGO_PKG_VERSION").to_string(), enable_markdown: self.enable_markdown, enable_text: self.enable_text, - user_agent: self.user_agent, - allow_prefixes: self.allow_prefixes, - block_prefixes: self.block_prefixes, - dns_policy: self.dns_policy, + user_agent: self.user_agent.clone(), + allow_prefixes: self.allow_prefixes.clone(), + block_prefixes: self.block_prefixes.clone(), + dns_policy: self.dns_policy.clone(), max_body_size: self.max_body_size, enable_save_to_file: self.enable_save_to_file, respect_proxy_env: self.respect_proxy_env, - allowed_ports: self.allowed_ports, - blocked_hosts: self.blocked_hosts, + allowed_ports: self.allowed_ports.clone(), + blocked_hosts: self.blocked_hosts.clone(), same_host_redirects_only: self.same_host_redirects_only, } } + + /// Build a `tower::Service` that accepts JSON args and returns JSON result. + pub fn build_service(&self) -> ToolService { + ToolService { tool: self.build() } + } + + /// Alias for `build_service()` for generic executor-oriented consumers. + pub fn build_executor(&self) -> ToolService { + self.build_service() + } + + /// Build an OpenAI-compatible function tool definition. + pub fn build_tool_definition(&self) -> Value { + let tool = self.build(); + json!({ + "type": "function", + "function": { + "name": tool.name(), + "description": tool.description(), + "parameters": tool.input_schema() + } + }) + } + + /// Build the JSON Schema for the tool's input parameters. + pub fn build_input_schema(&self) -> Value { + build_input_schema( + self.enable_markdown, + self.enable_text, + self.enable_save_to_file, + ) + } + + /// Build the JSON Schema for the tool's output. + pub fn build_output_schema(&self) -> Value { + build_output_schema() + } } /// Configured FetchKit tool /// /// Created via [`ToolBuilder`]. Provides methods for executing fetch requests, /// retrieving schemas, and accessing tool metadata. -/// -/// # Examples -/// -/// ```no_run -/// use fetchkit::{FetchRequest, Tool}; -/// -/// # async fn example() -> Result<(), fetchkit::FetchError> { -/// let tool = Tool::default(); -/// let response = tool.execute(FetchRequest::new("https://example.com")).await?; -/// println!("Status: {}", response.status_code); -/// # Ok(()) -/// # } -/// ``` -/// -/// Tool metadata is available without making any requests: -/// -/// ``` -/// use fetchkit::Tool; -/// -/// let tool = Tool::default(); -/// assert!(!tool.description().is_empty()); -/// assert!(!tool.llmtxt().is_empty()); -/// -/// let schema = tool.input_schema(); -/// assert!(schema["properties"]["url"].is_object()); -/// ``` #[derive(Debug, Clone)] pub struct Tool { + locale: String, + display_name: String, + description: String, + version: String, enable_markdown: bool, enable_text: bool, user_agent: Option, @@ -321,58 +388,83 @@ impl Tool { ToolBuilder::new() } - /// Get tool description, reflecting enabled features - pub fn description(&self) -> String { - let mut s = TOOL_DESCRIPTION_BASE.to_string(); - if self.enable_save_to_file { - s.push_str(TOOL_DESCRIPTION_SAVE); - } - s + /// Tool name for LLM invocation. + pub fn name(&self) -> &str { + TOOL_NAME + } + + /// Human-readable display name. + pub fn display_name(&self) -> &str { + &self.display_name + } + + /// Toolkit version. + pub fn version(&self) -> &str { + &self.version } - /// Get system prompt (empty for this tool) + /// Tool description, reflecting enabled features. + pub fn description(&self) -> &str { + &self.description + } + + /// Tool locale. + pub fn locale(&self) -> &str { + &self.locale + } + + /// Get system prompt contribution. pub fn system_prompt(&self) -> String { - String::new() + system_prompt( + &self.locale, + self.enable_save_to_file, + self.dns_policy.block_private, + ) } - /// Get full documentation (llmtxt), reflecting enabled features - pub fn llmtxt(&self) -> String { - build_llmtxt(self.enable_save_to_file) + /// Get comprehensive Markdown help. + pub fn help(&self) -> String { + build_help(self) } - /// Get input schema as JSON - pub fn input_schema(&self) -> serde_json::Value { - let schema = schema_for!(FetchRequest); - let mut value = serde_json::to_value(schema).unwrap_or_default(); + /// Backward-compatible alias for `help()`. + pub fn llmtxt(&self) -> String { + self.help() + } - // Remove disabled options from schema - if let Some(props) = value.get_mut("properties").and_then(|p| p.as_object_mut()) { - if !self.enable_markdown { - props.remove("as_markdown"); - } - if !self.enable_text { - props.remove("as_text"); - } - if !self.enable_save_to_file { - props.remove("save_to_file"); - } - } + /// Get input schema as JSON. + pub fn input_schema(&self) -> Value { + build_input_schema( + self.enable_markdown, + self.enable_text, + self.enable_save_to_file, + ) + } - value + /// Get output schema as JSON. + pub fn output_schema(&self) -> Value { + build_output_schema() } - /// Get output schema as JSON - pub fn output_schema(&self) -> serde_json::Value { - let schema = schema_for!(FetchResponse); - serde_json::to_value(schema).unwrap_or_default() + /// Create a single-use tool execution from JSON arguments. + pub fn execution(&self, args: Value) -> Result { + validate_args(self, &args)?; + let request: FetchRequest = serde_json::from_value(args) + .map_err(|err| invalid_arguments_error(self.locale(), &err.to_string()))?; + validate_request(self, &request)?; + + Ok(ToolExecution { + tool: self.clone(), + request, + }) } - /// Execute the tool with the given request + /// Execute the tool with the given typed request. pub async fn execute(&self, req: FetchRequest) -> Result { fetch_with_options(req, self.build_options()).await } - /// Execute the tool with status updates + /// Execute the tool with status updates. pub async fn execute_with_status( &self, req: FetchRequest, @@ -383,7 +475,6 @@ impl Tool { { status_callback(ToolStatus::new("validate").with_percent(0.0)); - // Validate request if req.url.is_empty() { return Err(FetchError::MissingUrl); } @@ -393,7 +484,6 @@ impl Tool { } status_callback(ToolStatus::new("connect").with_percent(10.0)); - status_callback(ToolStatus::new("fetch").with_percent(20.0)); let result = fetch_with_options(req, self.build_options()).await; @@ -403,24 +493,6 @@ impl Tool { result } - /// Build FetchOptions from this Tool's configuration - fn build_options(&self) -> FetchOptions { - FetchOptions { - user_agent: self.user_agent.clone(), - allow_prefixes: self.allow_prefixes.clone(), - block_prefixes: self.block_prefixes.clone(), - enable_markdown: self.enable_markdown, - enable_text: self.enable_text, - dns_policy: self.dns_policy.clone(), - max_body_size: self.max_body_size, - enable_save_to_file: self.enable_save_to_file, - respect_proxy_env: self.respect_proxy_env, - allowed_ports: self.allowed_ports.clone(), - blocked_hosts: self.blocked_hosts.clone(), - same_host_redirects_only: self.same_host_redirects_only, - } - } - /// Execute fetch with optional file saving. /// /// When `req.save_to_file` is set, validates the path via the saver, @@ -440,7 +512,6 @@ impl Tool { let saver = saver.ok_or(FetchError::SaverNotAvailable)?; - // Validate path before making HTTP request saver .validate_path(path) .await @@ -453,15 +524,518 @@ impl Tool { self.execute(req).await } } + + fn build_options(&self) -> FetchOptions { + FetchOptions { + user_agent: self.user_agent.clone(), + allow_prefixes: self.allow_prefixes.clone(), + block_prefixes: self.block_prefixes.clone(), + enable_markdown: self.enable_markdown, + enable_text: self.enable_text, + dns_policy: self.dns_policy.clone(), + max_body_size: self.max_body_size, + enable_save_to_file: self.enable_save_to_file, + respect_proxy_env: self.respect_proxy_env, + allowed_ports: self.allowed_ports.clone(), + blocked_hosts: self.blocked_hosts.clone(), + same_host_redirects_only: self.same_host_redirects_only, + } + } +} + +/// Single-use runtime execution for one tool call. +#[derive(Debug, Clone)] +pub struct ToolExecution { + tool: Tool, + request: FetchRequest, +} + +impl ToolExecution { + /// Run to completion without adapters. + pub async fn execute(self) -> Result { + self.execute_inner(None).await + } + + /// Run to completion with an injected file saver adapter. + pub async fn execute_with(self, saver: &A) -> Result + where + A: FileSaver, + { + self.execute_inner(Some(saver)).await + } + + async fn execute_inner(self, saver: Option<&dyn FileSaver>) -> Result { + let ToolExecution { tool, request } = self; + let started_at = Instant::now(); + let response = tool + .execute_with_saver(request, saver) + .await + .map_err(|err| map_fetch_error(&tool.locale, err))?; + + build_tool_output(response, started_at) + } +} + +/// Generic JSON args → JSON result service. +#[derive(Debug, Clone)] +pub struct ToolService { + tool: Tool, +} + +impl Service for ToolService { + type Response = Value; + type Error = ToolError; + type Future = BoxFuture<'static, Result>; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, req: Value) -> Self::Future { + let tool = self.tool.clone(); + Box::pin(async move { + let output = tool.execution(req)?.execute().await?; + Ok(output.result) + }) + } +} + +fn build_tool_output( + response: FetchResponse, + started_at: Instant, +) -> Result { + let result = serde_json::to_value(&response) + .map_err(|err| ToolError::Internal(format!("failed to serialize tool output: {err}")))?; + + Ok(ToolOutput { + result, + images: Vec::new(), + metadata: ToolOutputMetadata { + duration: started_at.elapsed(), + extra: json!({ + "http_status": response.status_code, + "content_type": response.content_type, + "content_length": response.size, + "format": response.format, + "truncated": response.truncated.unwrap_or(false), + "saved_path": response.saved_path, + "bytes_written": response.bytes_written, + }), + }, + }) +} + +fn validate_args(tool: &Tool, args: &Value) -> Result<(), ToolError> { + let object = args + .as_object() + .ok_or_else(|| invalid_arguments_error(tool.locale(), "arguments must be a JSON object"))?; + + for key in object.keys() { + let allowed = match key.as_str() { + "url" | "method" => true, + "as_markdown" => tool.enable_markdown, + "as_text" => tool.enable_text, + "save_to_file" => tool.enable_save_to_file, + _ => false, + }; + + if !allowed { + return Err(unknown_parameter_error(tool.locale(), key)); + } + } + + Ok(()) +} + +fn validate_request(tool: &Tool, request: &FetchRequest) -> Result<(), ToolError> { + if request.url.is_empty() { + return Err(map_fetch_error(tool.locale(), FetchError::MissingUrl)); + } + + if !request.url.starts_with("http://") && !request.url.starts_with("https://") { + return Err(map_fetch_error(tool.locale(), FetchError::InvalidUrlScheme)); + } + + Ok(()) +} + +fn build_input_schema( + enable_markdown: bool, + enable_text: bool, + enable_save_to_file: bool, +) -> Value { + let mut properties = Map::new(); + properties.insert( + "url".to_string(), + json!({"type": "string", "format": "uri"}), + ); + properties.insert( + "method".to_string(), + json!({"type": "string", "enum": ["GET", "HEAD"], "default": "GET"}), + ); + + if enable_markdown { + properties.insert( + "as_markdown".to_string(), + json!({"type": "boolean", "default": false}), + ); + } + + if enable_text { + properties.insert( + "as_text".to_string(), + json!({"type": "boolean", "default": false}), + ); + } + + if enable_save_to_file { + properties.insert( + "save_to_file".to_string(), + json!({ + "type": "string", + "description": "Adapter-defined destination path" + }), + ); + } + + json!({ + "type": "object", + "properties": properties, + "required": ["url"], + "additionalProperties": false, + }) +} + +fn build_output_schema() -> Value { + json!({ + "type": "object", + "properties": { + "url": {"type": "string"}, + "status_code": {"type": "integer", "minimum": 100, "maximum": 599}, + "content_type": {"type": "string"}, + "size": {"type": "integer", "minimum": 0}, + "last_modified": {"type": "string"}, + "filename": {"type": "string"}, + "format": {"type": "string", "enum": ["markdown", "text", "raw", "github_repo"]}, + "content": {"type": "string"}, + "truncated": {"type": "boolean"}, + "method": {"type": "string", "enum": ["HEAD"]}, + "error": {"type": "string"}, + "saved_path": {"type": "string"}, + "bytes_written": {"type": "integer", "minimum": 0} + }, + "required": ["url", "status_code"], + "additionalProperties": false + }) +} + +fn normalize_locale(locale: &str) -> String { + let locale = locale.trim(); + if locale.is_empty() { + DEFAULT_LOCALE.to_string() + } else { + locale.to_string() + } +} + +fn is_ukrainian(locale: &str) -> bool { + locale.to_ascii_lowercase().starts_with("uk") +} + +fn display_name(locale: &str) -> &'static str { + if is_ukrainian(locale) { + "Веб-завантаження" + } else { + "Web Fetch" + } +} + +fn description(locale: &str, enable_save_to_file: bool) -> String { + if is_ukrainian(locale) { + if enable_save_to_file { + "Завантажити URL як текст або markdown; повернути метадані для бінарного вмісту або зберегти байти через save_to_file.".to_string() + } else { + "Завантажити URL як текст або markdown; повернути метадані для бінарного вмісту." + .to_string() + } + } else if enable_save_to_file { + "Fetch URL content as text or markdown; return metadata for binary responses or save bytes with save_to_file.".to_string() + } else { + "Fetch URL content as text or markdown; return metadata for binary responses.".to_string() + } +} + +fn system_prompt(locale: &str, enable_save_to_file: bool, block_private_ips: bool) -> String { + if is_ukrainian(locale) { + let binary_rule = if enable_save_to_file { + "Бінарні відповіді повертають метадані; використовуйте save_to_file, щоб зберегти байти." + } else { + "Бінарні відповіді повертають лише метадані." + }; + let network_rule = if block_private_ips { + "Приватні IP-адреси заблоковані." + } else { + "Блокування приватних IP-адрес вимкнене." + }; + format!( + "{}: повертає truncated=true для часткових відповідей після таймауту. {} {}", + TOOL_NAME, binary_rule, network_rule + ) + } else { + let binary_rule = if enable_save_to_file { + "Binary responses return metadata; use save_to_file to persist bytes." + } else { + "Binary responses return metadata only." + }; + let network_rule = if block_private_ips { + "Private IPs are blocked." + } else { + "Private IP blocking is disabled." + }; + format!( + "{}: returns truncated=true for partial responses after timeout. {} {}", + TOOL_NAME, binary_rule, network_rule + ) + } +} + +fn build_help(tool: &Tool) -> String { + let (parameters_heading, examples_heading, adapters_heading, errors_heading, locale_label) = + if is_ukrainian(tool.locale()) { + ("Параметри", "Приклади", "Адаптери", "Помилки", "Локаль") + } else { + ("Parameters", "Examples", "Adapters", "Errors", "Locale") + }; + + let mut rows = vec![ + table_row( + "url", + "string", + "yes", + "—", + parameter_description(tool.locale(), "url"), + ), + table_row( + "method", + "string", + "no", + "\"GET\"", + parameter_description(tool.locale(), "method"), + ), + ]; + + if tool.enable_markdown { + rows.push(table_row( + "as_markdown", + "boolean", + "no", + "false", + parameter_description(tool.locale(), "as_markdown"), + )); + } + + if tool.enable_text { + rows.push(table_row( + "as_text", + "boolean", + "no", + "false", + parameter_description(tool.locale(), "as_text"), + )); + } + + if tool.enable_save_to_file { + rows.push(table_row( + "save_to_file", + "string", + "no", + "—", + parameter_description(tool.locale(), "save_to_file"), + )); + } + + let adapters = if tool.enable_save_to_file { + if is_ukrainian(tool.locale()) { + "- `FileSaver` (необов’язковий): потрібен, коли задано `save_to_file`.\n" + } else { + "- `FileSaver` (optional): required when `save_to_file` is set.\n" + } + } else if is_ukrainian(tool.locale()) { + "- Збереження файлів вимкнене в цій конфігурації.\n" + } else { + "- File saving is disabled in this tool build.\n" + }; + + let errors = if is_ukrainian(tool.locale()) { + if tool.enable_save_to_file { + "- `MissingUrl` — параметр `url` обов’язковий\n\ + - `InvalidUrlScheme` — схема URL має бути `http` або `https`\n\ + - `BlockedUrl` — URL заблокований політикою SSRF або allow/block правилами\n\ + - `FirstByteTimeout` — сервер не відповів протягом 1 секунди\n\ + - `SaverNotAvailable` — `save_to_file` потребує адаптер `FileSaver`\n" + } else { + "- `MissingUrl` — параметр `url` обов’язковий\n\ + - `InvalidUrlScheme` — схема URL має бути `http` або `https`\n\ + - `BlockedUrl` — URL заблокований політикою SSRF або allow/block правилами\n\ + - `FirstByteTimeout` — сервер не відповів протягом 1 секунди\n" + } + } else if tool.enable_save_to_file { + "- `MissingUrl` — `url` is required\n\ + - `InvalidUrlScheme` — URL scheme must be `http` or `https`\n\ + - `BlockedUrl` — URL blocked by SSRF policy or allow/block rules\n\ + - `FirstByteTimeout` — server did not respond within 1 second\n\ + - `SaverNotAvailable` — `save_to_file` requires a `FileSaver` adapter\n" + } else { + "- `MissingUrl` — `url` is required\n\ + - `InvalidUrlScheme` — URL scheme must be `http` or `https`\n\ + - `BlockedUrl` — URL blocked by SSRF policy or allow/block rules\n\ + - `FirstByteTimeout` — server did not respond within 1 second\n" + }; + + let mut help = String::new(); + help.push_str(&format!("# {}\n\n", tool.display_name())); + help.push_str(tool.description()); + help.push_str("\n\n"); + help.push_str(&format!("**Version:** {}\n", tool.version())); + help.push_str(&format!("**Name:** `{}`\n", tool.name())); + help.push_str(&format!("**{}:** `{}`\n\n", locale_label, tool.locale())); + help.push_str(&format!("## {}\n\n", parameters_heading)); + help.push_str("| Name | Type | Required | Default | Description |\n"); + help.push_str("|------|------|----------|---------|-------------|\n"); + for row in rows { + help.push_str(&row); + } + help.push('\n'); + help.push_str(&format!("## {}\n\n", examples_heading)); + help.push_str("```json\n"); + help.push_str("{\"url\": \"https://example.com\", \"as_markdown\": true}\n"); + help.push_str("```\n\n"); + help.push_str("```json\n"); + help.push_str("{\"url\": \"https://example.com/file.pdf\", \"method\": \"HEAD\"}\n"); + help.push_str("```\n"); + if tool.enable_save_to_file { + help.push_str("\n```json\n"); + help.push_str( + "{\"url\": \"https://example.com/image.png\", \"save_to_file\": \"/tmp/image.png\"}\n", + ); + help.push_str("```\n"); + } + help.push('\n'); + help.push_str(&format!("## {}\n\n", adapters_heading)); + help.push_str(adapters); + help.push('\n'); + help.push_str(&format!("## {}\n\n", errors_heading)); + help.push_str(errors); + help.push('\n'); + help.push_str("## System Prompt\n\n"); + help.push_str(&tool.system_prompt()); + help.push('\n'); + help +} + +fn parameter_description(locale: &str, field: &str) -> &'static str { + match (is_ukrainian(locale), field) { + (true, "url") => "HTTP або HTTPS URL", + (true, "method") => "`GET` або `HEAD`", + (true, "as_markdown") => "Перетворити HTML у markdown", + (true, "as_text") => "Перетворити HTML у plain text", + (true, "save_to_file") => "Шлях призначення, визначений адаптером", + (false, "url") => "HTTP or HTTPS URL", + (false, "method") => "`GET` or `HEAD`", + (false, "as_markdown") => "Convert HTML to markdown", + (false, "as_text") => "Convert HTML to plain text", + (false, "save_to_file") => "Adapter-defined destination path", + _ => "", + } +} + +fn table_row(name: &str, ty: &str, required: &str, default: &str, description: &str) -> String { + format!("| `{name}` | {ty} | {required} | {default} | {description} |\n") +} + +fn map_fetch_error(locale: &str, err: FetchError) -> ToolError { + match err { + FetchError::ClientBuildError(_) => internal_error("failed to create HTTP client"), + FetchError::MissingUrl => user_error(locale, user_text(locale, "missing_url")), + FetchError::InvalidUrlScheme => user_error(locale, user_text(locale, "invalid_scheme")), + FetchError::InvalidMethod => user_error(locale, user_text(locale, "invalid_method")), + FetchError::BlockedUrl => user_error(locale, user_text(locale, "blocked_url")), + FetchError::FirstByteTimeout => user_error(locale, user_text(locale, "timeout")), + FetchError::ConnectError(_) => user_error(locale, user_text(locale, "connect_error")), + FetchError::RequestError(_) => user_error(locale, user_text(locale, "request_error")), + FetchError::FetcherError(_) => user_error(locale, user_text(locale, "fetcher_error")), + FetchError::SaveError(_) => user_error(locale, user_text(locale, "save_error")), + FetchError::SaverNotAvailable => user_error(locale, user_text(locale, "saver_missing")), + } +} + +fn invalid_arguments_error(locale: &str, detail: &str) -> ToolError { + if is_ukrainian(locale) { + user_error(locale, format!("Неприпустимі аргументи: {detail}")) + } else { + user_error(locale, format!("Invalid arguments: {detail}")) + } +} + +fn unknown_parameter_error(locale: &str, key: &str) -> ToolError { + if is_ukrainian(locale) { + user_error(locale, format!("Невідомий параметр: {key}")) + } else { + user_error(locale, format!("Unknown parameter: {key}")) + } +} + +fn user_text(locale: &str, key: &str) -> &'static str { + match (is_ukrainian(locale), key) { + (true, "missing_url") => "Параметр url обов’язковий.", + (true, "invalid_scheme") => "Схема URL має бути http або https.", + (true, "invalid_method") => "Метод має бути GET або HEAD.", + (true, "blocked_url") => "URL заблокований політикою безпеки.", + (true, "timeout") => { + "Сервер не відповів протягом 1 секунди. Спробуйте ще раз або інший URL." + } + (true, "connect_error") => { + "Не вдалося з’єднатися із сервером. Спробуйте ще раз або інший URL." + } + (true, "request_error") => "Запит не вдався. Спробуйте ще раз.", + (true, "fetcher_error") => "Не вдалося обробити відповідь цього URL.", + (true, "save_error") => "Не вдалося зберегти файл. Перевірте шлях призначення.", + (true, "saver_missing") => "save_to_file потребує адаптер FileSaver.", + (false, "missing_url") => "url is required.", + (false, "invalid_scheme") => "URL scheme must be http or https.", + (false, "invalid_method") => "Method must be GET or HEAD.", + (false, "blocked_url") => "URL is blocked by security policy.", + (false, "timeout") => { + "Server did not respond within 1 second. Retry or try a different URL." + } + (false, "connect_error") => "Could not connect to server. Retry or try a different URL.", + (false, "request_error") => "Request failed. Retry the tool call.", + (false, "fetcher_error") => "Could not process the response for this URL.", + (false, "save_error") => "Could not save the file. Check the destination path.", + (false, "saver_missing") => "save_to_file requires the FileSaver adapter.", + _ => "Tool execution failed.", + } +} + +fn user_error(locale: &str, message: impl Into) -> ToolError { + let _ = locale; + ToolError::UserFacing(message.into()) +} + +fn internal_error(message: impl Into) -> ToolError { + ToolError::Internal(message.into()) } #[cfg(test)] mod tests { use super::*; + use serde_json::json; + use tower::Service; #[test] fn test_tool_builder() { let tool = Tool::builder() + .locale("uk-UA") .enable_markdown(false) .enable_text(true) .user_agent("TestAgent/1.0") @@ -471,12 +1045,14 @@ mod tests { .respect_proxy_env(true) .build(); + assert_eq!(tool.locale(), "uk-UA"); + assert_eq!(tool.name(), "web_fetch"); + assert_eq!(tool.display_name(), "Веб-завантаження"); assert!(!tool.enable_markdown); assert!(tool.enable_text); assert_eq!(tool.user_agent, Some("TestAgent/1.0".to_string())); assert_eq!(tool.allow_prefixes, vec!["https://allowed.com"]); assert_eq!(tool.block_prefixes, vec!["https://blocked.com"]); - // Safe by default: private IPs blocked assert!(tool.dns_policy.block_private); assert_eq!(tool.max_body_size, Some(1024)); assert!(!tool.enable_save_to_file); @@ -516,19 +1092,20 @@ mod tests { } #[test] - fn test_tool_description() { + fn test_tool_metadata() { let tool = Tool::default(); + assert_eq!(tool.name(), "web_fetch"); + assert_eq!(tool.display_name(), "Web Fetch"); assert!(!tool.description().is_empty()); - assert!(tool.system_prompt().is_empty()); - assert!(!tool.llmtxt().is_empty()); - - // Default tool should NOT mention save_to_file - assert!(!tool.description().contains("save_to_file")); - assert!(!tool.llmtxt().contains("save_to_file")); + assert!(tool.system_prompt().starts_with("web_fetch:")); + assert!(tool.help().contains("## Parameters")); + assert_eq!(tool.locale(), "en-US"); + } - // Enabled tool SHOULD mention save_to_file + #[test] + fn test_tool_llmtxt_matches_help() { let tool = Tool::builder().enable_save_to_file(true).build(); - assert!(tool.description().contains("save_to_file")); + assert_eq!(tool.llmtxt(), tool.help()); assert!(tool.llmtxt().contains("save_to_file")); } @@ -538,10 +1115,9 @@ mod tests { let input_schema = tool.input_schema(); let output_schema = tool.output_schema(); - // Input schema should have url property - assert!(input_schema["properties"]["url"].is_object()); - - // Output schema should have url and status_code + assert_eq!(input_schema["type"], "object"); + assert_eq!(input_schema["properties"]["url"]["format"], "uri"); + assert_eq!(input_schema["properties"]["method"]["default"], "GET"); assert!(output_schema["properties"]["url"].is_object()); assert!(output_schema["properties"]["status_code"].is_object()); } @@ -554,12 +1130,44 @@ mod tests { .build(); let schema = tool.input_schema(); + let props = schema + .get("properties") + .and_then(|p| p.as_object()) + .unwrap(); - // Disabled options should be removed from schema - if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) { - assert!(!props.contains_key("as_markdown")); - assert!(!props.contains_key("as_text")); - } + assert!(!props.contains_key("as_markdown")); + assert!(!props.contains_key("as_text")); + } + + #[test] + fn test_tool_definition_uses_contract_metadata() { + let definition = Tool::builder() + .enable_save_to_file(true) + .build_tool_definition(); + assert_eq!(definition["type"], "function"); + assert_eq!(definition["function"]["name"], "web_fetch"); + assert_eq!(definition["function"]["parameters"]["type"], "object"); + } + + #[test] + fn test_execution_rejects_unknown_parameter() { + let err = Tool::default().execution(json!({ + "url": "https://example.com", + "bogus": true + })); + + assert!(matches!(&err, Err(ToolError::UserFacing(_)))); + assert!(err.unwrap_err().to_string().contains("Unknown parameter")); + } + + #[test] + fn test_execution_rejects_invalid_url_before_running() { + let err = Tool::default().execution(json!({"url": "ftp://example.com"})); + assert!(matches!(&err, Err(ToolError::UserFacing(_)))); + assert!(err + .unwrap_err() + .to_string() + .contains("URL scheme must be http or https")); } #[test] @@ -574,4 +1182,11 @@ mod tests { assert_eq!(status.percent_complete, Some(50.0)); assert_eq!(status.eta_ms, Some(5000)); } + + #[tokio::test] + async fn test_build_service_propagates_validation_errors() { + let mut service = Tool::builder().build_service(); + let err = service.call(json!(["not-an-object"])).await.unwrap_err(); + assert!(err.is_user_facing()); + } } diff --git a/crates/fetchkit/tests/integration.rs b/crates/fetchkit/tests/integration.rs index 57ca38e..3ad141a 100644 --- a/crates/fetchkit/tests/integration.rs +++ b/crates/fetchkit/tests/integration.rs @@ -4,6 +4,8 @@ use fetchkit::{ fetch_with_options, DnsPolicy, FetchError, FetchOptions, FetchRequest, FetcherRegistry, HttpMethod, LocalFileSaver, Tool, }; +use serde_json::json; +use tower::Service; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, ResponseTemplate}; @@ -918,3 +920,61 @@ async fn test_execute_with_saver_no_save_falls_through() { assert!(resp.saved_path.is_none()); assert!(resp.bytes_written.is_none()); } + +#[tokio::test] +async fn test_tool_execution_returns_contract_output() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string("

Hello

") + .insert_header("content-type", "text/html"), + ) + .mount(&mock_server) + .await; + + let tool = test_tool(); + let output = tool + .execution(json!({ + "url": format!("{}/", mock_server.uri()), + "as_markdown": true + })) + .unwrap() + .execute() + .await + .unwrap(); + + assert_eq!(output.result["status_code"], 200); + assert!(output.result["format"].is_string()); + assert!(output.result["content"].as_str().unwrap().contains("Hello")); + assert_eq!(output.metadata.extra["http_status"], 200); + assert!(output.images.is_empty()); +} + +#[tokio::test] +async fn test_tool_service_executes_json_calls() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string("Hello, Service!") + .insert_header("content-type", "text/plain"), + ) + .mount(&mock_server) + .await; + + let mut service = Tool::builder().block_private_ips(false).build_service(); + let result = service + .call(json!({ + "url": format!("{}/", mock_server.uri()) + })) + .await + .unwrap(); + + assert_eq!(result["status_code"], 200); + assert_eq!(result["content"], "Hello, Service!"); +}