Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ tempfile = "3"

# Async traits
async-trait = "0.1"

# Service trait
tower = { version = "0.5", features = ["util"] }
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,21 @@ let request = FetchRequest::new("https://example.com");
let response = tool.execute(request).await.unwrap();
```

### Toolkit Contract Surface

```rust
use fetchkit::ToolBuilder;

let builder = ToolBuilder::new().enable_save_to_file(true);
let tool = builder.build();

assert_eq!(tool.name(), "web_fetch");
assert_eq!(tool.display_name(), "Web Fetch");

let definition = builder.build_tool_definition();
let mut service = builder.build_service();
```

### Hardened Tool Profile

```rust
Expand Down
65 changes: 37 additions & 28 deletions crates/fetchkit-cli/src/mcp.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! MCP (Model Context Protocol) server implementation

use fetchkit::{FetchRequest, Tool};
use fetchkit::Tool;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::io::{self, BufRead, Write};
Expand Down Expand Up @@ -105,23 +105,14 @@ impl McpServer {
}

fn handle_tools_list(&self, id: Option<Value>) -> JsonRpcResponse {
let input_schema = json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to fetch (required, must be http:// or https://)"
}
},
"required": ["url"]
});
let input_schema = self.tool.input_schema();

JsonRpcResponse::success(
id,
json!({
"tools": [{
"name": "fetchkit",
"description": "Fetch URL and return markdown with metadata frontmatter. Optimized for LLM consumption.",
"name": self.tool.name(),
"description": self.tool.description(),
"inputSchema": input_schema
}]
}),
Expand All @@ -134,30 +125,48 @@ impl McpServer {
.and_then(|v| v.as_str())
.unwrap_or_default();

if tool_name != "fetchkit" {
if tool_name != self.tool.name() {
return JsonRpcResponse::error(id, -32602, format!("Unknown tool: {}", tool_name));
}

self.handle_fetchkit_call(id, params).await
self.handle_tool_call(id, params).await
}

async fn handle_fetchkit_call(&self, id: Option<Value>, params: Value) -> JsonRpcResponse {
let arguments = params.get("arguments").cloned().unwrap_or(json!({}));
async fn handle_tool_call(&self, id: Option<Value>, params: Value) -> JsonRpcResponse {
let mut arguments = params.get("arguments").cloned().unwrap_or(json!({}));
if let Some(object) = arguments.as_object_mut() {
let wants_head = object
.get("method")
.and_then(|value| value.as_str())
.is_some_and(|method| method.eq_ignore_ascii_case("HEAD"));
let has_output_mode = object.contains_key("as_markdown")
|| object.contains_key("as_text")
|| object.contains_key("save_to_file");

// Extract URL from arguments
let url = match arguments.get("url").and_then(|v| v.as_str()) {
Some(u) => u.to_string(),
None => {
return JsonRpcResponse::error(id, -32602, "Missing required argument: url");
if !wants_head && !has_output_mode {
object.insert("as_markdown".to_string(), json!(true));
}
};
}

// Build request with markdown conversion
let request = FetchRequest::new(url).as_markdown();
let execution = match self.tool.execution(arguments) {
Ok(execution) => execution,
Err(err) => {
return JsonRpcResponse::success(
id,
json!({
"content": [{
"type": "text",
"text": format!("Error: {}", err)
}],
"isError": true
}),
);
}
};

// Execute tool
match self.tool.execute(request).await {
Ok(response) => {
match execution.execute().await {
Ok(output) => {
let response = serde_json::from_value(output.result).unwrap_or_default();
let output = format_md_with_frontmatter(&response);
JsonRpcResponse::success(
id,
Expand Down
8 changes: 4 additions & 4 deletions crates/fetchkit-cli/tests/cli_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ fn test_llmtxt_flag() {

let stdout = String::from_utf8_lossy(&output.stdout);
assert!(output.status.success());
assert!(stdout.contains("FetchKit Tool"));
assert!(stdout.contains("Capabilities"));
assert!(stdout.contains("Input Parameters"));
assert!(stdout.contains("# Web Fetch"));
assert!(stdout.contains("## Parameters"));
assert!(stdout.contains("**Name:** `web_fetch`"));
}

// ============================================================================
Expand Down Expand Up @@ -265,7 +265,7 @@ fn test_mcp_initialize() {
assert_eq!(list_resp["id"], 2);
let tools = list_resp["result"]["tools"].as_array().unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0]["name"], "fetchkit");
assert_eq!(tools[0]["name"], "web_fetch");
assert!(tools[0]["inputSchema"].is_object());
}

Expand Down
2 changes: 1 addition & 1 deletion crates/fetchkit-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ impl PyFetchKitTool {

/// Get tool description
fn description(&self) -> String {
self.inner.description()
self.inner.description().to_string()
}

/// Get system prompt
Expand Down
1 change: 1 addition & 0 deletions crates/fetchkit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ thiserror = { workspace = true }
futures = { workspace = true }
bytes = { workspace = true }
async-trait = { workspace = true }
tower = { workspace = true }

[dev-dependencies]
wiremock = { workspace = true }
Expand Down
24 changes: 24 additions & 0 deletions crates/fetchkit/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,24 @@ impl FetchError {
}
}

/// Errors returned by the toolkit-library contract surface.
#[derive(Debug, Error)]
pub enum ToolError {
/// Safe to surface to an LLM or end user.
#[error("{0}")]
UserFacing(String),
/// Internal/operator-facing failure details.
#[error("{0}")]
Internal(String),
}

impl ToolError {
/// Whether this error is safe to show to the LLM.
pub fn is_user_facing(&self) -> bool {
matches!(self, Self::UserFacing(_))
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -107,4 +125,10 @@ mod tests {
"Request timed out: server did not respond within 1 second"
);
}

#[test]
fn test_tool_error_classification() {
assert!(ToolError::UserFacing("url is required".to_string()).is_user_facing());
assert!(!ToolError::Internal("serde failure".to_string()).is_user_facing());
}
}
140 changes: 10 additions & 130 deletions crates/fetchkit/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,142 +73,22 @@ mod types;
pub use client::{fetch, fetch_with_options, FetchOptions};
pub use convert::{html_to_markdown, html_to_text};
pub use dns::DnsPolicy;
pub use error::FetchError;
pub use error::{FetchError, ToolError};
pub use fetchers::{DefaultFetcher, Fetcher, FetcherRegistry, GitHubRepoFetcher};
pub use file_saver::{FileSaveError, FileSaver, LocalFileSaver, SaveResult};
pub use tool::{Tool, ToolBuilder, ToolStatus};
pub use tool::{
Tool, ToolBuilder, ToolExecution, ToolImage, ToolOutput, ToolOutputMetadata, ToolService,
ToolStatus,
};
pub use types::{FetchRequest, FetchResponse, HttpMethod};

/// Default User-Agent string
pub const DEFAULT_USER_AGENT: &str = "Everruns FetchKit/1.0";

// -- Tool description fragments (composed dynamically by Tool methods) --
/// Backward-compatible full description string with file-saving enabled.
pub const TOOL_DESCRIPTION: &str =
"Fetch URL content as text or markdown; return metadata for binary responses or save bytes to file.";

/// Base tool description (always included)
pub(crate) const TOOL_DESCRIPTION_BASE: &str = "\
Fetches content from a URL and optionally converts HTML to markdown or text.

- Supports GET and HEAD methods
- Converts HTML to markdown or plain text
- Returns metadata for binary content
- Strict timeouts for reliability";

/// Save-to-file line appended to description when enabled
pub(crate) const TOOL_DESCRIPTION_SAVE: &str = "\n- File download (save_to_file)";

// -- TOOL_LLMTXT fragments --

pub(crate) const TOOL_LLMTXT_HEADER: &str = "\
# FetchKit Tool

Fetches content from a URL and optionally converts HTML to markdown or text.

## Capabilities
- HTTP GET and HEAD requests
- HTML to Markdown conversion
- HTML to plain text conversion
- Binary content detection (returns metadata only)
- Automatic timeout handling

## Input Parameters
- `url` (required): The URL to fetch (must be http:// or https://)
- `method` (optional): GET or HEAD (default: GET)
- `as_markdown` (optional): Convert HTML to markdown
- `as_text` (optional): Convert HTML to plain text";

pub(crate) const TOOL_LLMTXT_SAVE_INPUT: &str = "\
\n- `save_to_file` (optional): Save response body to this path instead of returning inline content. \
Accepts binary content (images, PDFs, archives). Requires file saving to be enabled.";

pub(crate) const TOOL_LLMTXT_OUTPUT_BASE: &str = "

## Output Fields
- `url`: The fetched URL
- `status_code`: HTTP status code
- `content_type`: Content-Type header value
- `size`: Content size in bytes
- `last_modified`: Last-Modified header value
- `filename`: Extracted filename
- `format`: \"markdown\", \"text\", or \"raw\"
- `content`: The fetched/converted content
- `truncated`: True if content was truncated due to timeout
- `method`: \"HEAD\" for HEAD requests
- `error`: Error message for binary content";

pub(crate) const TOOL_LLMTXT_SAVE_OUTPUT: &str = "\
\n- `saved_path`: Path where file was saved (when save_to_file was used)
- `bytes_written`: Bytes written to file (when save_to_file was used)";

pub(crate) const TOOL_LLMTXT_EXAMPLES_BASE: &str = "

## Examples

### Fetch a webpage as markdown
```json
{\"url\": \"https://example.com\", \"as_markdown\": true}
```

### Check if a URL exists (HEAD request)
```json
{\"url\": \"https://example.com/file.pdf\", \"method\": \"HEAD\"}
```

### Fetch raw content
```json
{\"url\": \"https://api.example.com/data.json\"}
```";

pub(crate) const TOOL_LLMTXT_SAVE_EXAMPLE: &str = "

### Download a file
```json
{\"url\": \"https://example.com/image.png\", \"save_to_file\": \"image.png\"}
```";

pub(crate) const TOOL_LLMTXT_ERRORS_BASE: &str = "

## Error Handling
- Invalid URLs return an error
- Binary content returns metadata with error message
- Timeouts return partial content with truncated flag";

pub(crate) const TOOL_LLMTXT_SAVE_ERRORS: &str = "\
\n- Binary content is accepted when using save_to_file\n\
- File saving errors include path validation and IO failures";

/// Compose full TOOL_DESCRIPTION with all features (for backward compat / CLI)
pub const TOOL_DESCRIPTION: &str = "\
Fetches content from a URL and optionally converts HTML to markdown or text.\n\
\n\
- Supports GET and HEAD methods\n\
- Converts HTML to markdown or plain text\n\
- Returns metadata for binary content\n\
- Strict timeouts for reliability\n\
- File download (save_to_file)";

/// Compose full TOOL_LLMTXT with all features (for backward compat / CLI)
/// Backward-compatible help document with file-saving enabled.
pub static TOOL_LLMTXT: std::sync::LazyLock<String> =
std::sync::LazyLock::new(|| build_llmtxt(true));

/// Build llmtxt string with optional save_to_file sections
pub(crate) fn build_llmtxt(include_save: bool) -> String {
let mut s = String::with_capacity(2048);
s.push_str(TOOL_LLMTXT_HEADER);
if include_save {
s.push_str(TOOL_LLMTXT_SAVE_INPUT);
}
s.push_str(TOOL_LLMTXT_OUTPUT_BASE);
if include_save {
s.push_str(TOOL_LLMTXT_SAVE_OUTPUT);
}
s.push_str(TOOL_LLMTXT_EXAMPLES_BASE);
if include_save {
s.push_str(TOOL_LLMTXT_SAVE_EXAMPLE);
}
s.push_str(TOOL_LLMTXT_ERRORS_BASE);
if include_save {
s.push_str(TOOL_LLMTXT_SAVE_ERRORS);
}
s.push('\n');
s
}
std::sync::LazyLock::new(|| Tool::builder().enable_save_to_file(true).build().help());
Loading
Loading