Skip to content

Commit dfd9e90

Browse files
committed
tighten up mcp tool inputs
1 parent f15f6f3 commit dfd9e90

3 files changed

Lines changed: 390 additions & 132 deletions

File tree

src/mcp/server.rs

Lines changed: 101 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -79,52 +79,61 @@ async fn mcp_info() -> impl IntoResponse {
7979
}
8080

8181
async fn mcp_docs() -> impl IntoResponse {
82-
let payload = ApiResponse::success(json!({
83-
"dsl": {
84-
"semantics": "Structured search payload",
85-
"or_support": "Use structured any_terms:[...] for OR semantics. all_terms are ANDed.",
86-
"wildcards": "Use path/file as glob-like filters. Plain terms are literal term matches.",
87-
"regex": "Use regex field to enable regex content matching.",
88-
"path_search_behavior": "path_search requires a non-empty query and is for fuzzy path matching only.",
89-
"file_list_behavior": "file_list enumerates directories and files with optional recursive depth and limit.",
82+
let payload = ApiResponse::success(mcp_docs_payload());
83+
(StatusCode::OK, Json(payload))
84+
}
85+
86+
fn mcp_docs_payload() -> Value {
87+
json!({
88+
"search_payload": {
89+
"semantics": "search accepts structured JSON fields only. Do not send a free-form query string.",
90+
"or_support": "Use any_terms:[...] for OR semantics. all_terms are ANDed.",
91+
"wildcards": "Use path/file as glob-like filters. all_terms and any_terms are literal terms, not wildcard patterns.",
92+
"regex": "Use the regex field for content regex matching. Provide only the pattern string and JSON-escape backslashes.",
93+
"path_search_behavior": "path_search requires a non-empty plain fuzzy query and is for fuzzy path matching only.",
94+
"file_list_behavior": "file_list enumerates directories and files with optional recursive depth and limit. Use path as a directory prefix, not a search query.",
9095
"file_content_behavior": "file_content supports optional start_line/end_line (1-based, inclusive) to return snippets instead of full files.",
9196
"recency_workflow": "For recent or older change questions: repositories -> repo_branches -> search by branch and compare indexed_at or is_live.",
9297
"search_fields": [
93-
"repo: string",
94-
"branch: string",
95-
"lang: string",
96-
"path: glob-like string",
97-
"file: glob-like string",
98-
"regex: string",
98+
"repo: exact repository key from repositories",
99+
"branch: exact branch name from repo_branches",
100+
"lang: language filter",
101+
"path: glob-like path filter",
102+
"file: glob-like filename/path filter",
103+
"regex: regex pattern string (JSON-escaped)",
99104
"case: yes|no|auto",
100105
"historical: boolean",
101106
"all_terms: string[] (AND)",
102107
"any_terms: string[] (OR)"
103108
],
109+
"escaping": [
110+
"Do not prefix field values with repo:, path:, file:, or regex:.",
111+
"Regex patterns are JSON strings, so escape backslashes. Example: \"\\\\bQueryParser\\\\(\".",
112+
"If a JSON string contains double quotes, escape them as \\\"."
113+
],
104114
"troubleshooting": [
105-
"No results with repo filter: call repositories and use exact repo key.",
106-
"No branch results: call repo_branches and use exact branch name.",
115+
"No results with repo filter: call repositories and use the exact repo key.",
116+
"No branch results: call repo_branches and use the exact branch name.",
107117
"Need OR behavior: place alternatives in any_terms:[\"termA\",\"termB\"].",
108-
"Need regex matching: use regex:<pattern> instead of wildcard plain terms.",
109-
"Need directory listing: use file_list instead of path_search with empty query."
118+
"Need regex matching: set the regex field instead of using wildcard plain terms.",
119+
"Need directory listing: use file_list. Need fuzzy path lookup: use path_search."
110120
]
111121
},
112122
"cookbook": [
113123
"1) repositories(limit=20) to discover repo keys",
114124
"2) repo_branches(repo) to discover branch names and freshness",
115125
"3) search({repo, branch, all_terms:[\"term\"]}) for scoped search",
116126
"4) search({repo, branch, historical:true, all_terms:[\"term\"]}) for older snapshots",
117-
"5) search({repo, regex:\"pattern\"}) for regex matching",
118-
"6) file_list(repo, branch, path, depth, limit) for enumeration",
119-
"7) path_search(repo, branch, query) for fuzzy path lookup",
120-
"8) file_content(repo, branch, path, start_line?, end_line?) for raw source text or snippets",
127+
"5) search({repo, regex:\"\\\\bQueryParser\\\\(\"}) for regex matching",
128+
"6) file_list({repo, branch, path:\"src/mcp\", depth:2}) for directory enumeration",
129+
"7) path_search({repo, branch, query:\"mcp serv\"}) for fuzzy path lookup",
130+
"8) file_content({repo, branch, path, start_line?, end_line?}) for raw source text or snippets",
121131
"9) For large files, prefer file_content with line snippets first, then expand only if needed",
122-
"10) symbol_insights(params) for definitions and references",
132+
"10) symbol_insights({params:{...}}) for definitions and references",
123133
"11) OR behavior: search({repo, any_terms:[\"termA\",\"termB\"], dedupe:\"repo_path_line\"})",
124134
"12) For no results, broaden filters and retry per branch"
125135
]
126-
}));
127-
(StatusCode::OK, Json(payload))
136+
})
128137
}
129138

130139
#[derive(Debug, Deserialize, Serialize)]
@@ -190,7 +199,7 @@ async fn mcp_rpc(Json(req): Json<JsonRpcRequest>) -> Response {
190199
"name": "pointer-mcp",
191200
"version": env!("CARGO_PKG_VERSION"),
192201
},
193-
"instructions": "Use tools to query indexed code and symbol information. Operational flow: repositories -> repo_branches -> file_list/path_search -> file_content/search/symbol_insights. Use structured search fields: all_terms are AND semantics and any_terms are OR semantics (fanout + dedupe). For recency/version questions like 'recent change', call repo_branches first, then run search with explicit branch values and compare indexed_at/is_live metadata; add historical:true when historical snapshots should be included. Plain terms do not support wildcard matching; use regex for pattern matching. path_search requires a non-empty query and is not a directory listing endpoint; use file_list for enumeration. For large files, call file_content with start_line/end_line first to limit context size.",
202+
"instructions": "Use tools to query indexed code and symbol information; do not fall back to local filesystem reads for indexed lookup. Operational flow: repositories -> repo_branches -> file_list/path_search -> file_content/search/symbol_insights. search accepts structured JSON fields only; do not send a free-form `query` string. Keep filter values plain: do not include prefixes like `repo:`, `path:`, or `regex:` inside field values. all_terms are AND semantics and any_terms are OR semantics (fanout + dedupe). For recency/version questions like 'recent change', call repo_branches first, then run search with explicit branch values and compare indexed_at/is_live metadata; add historical:true when historical snapshots should be included. Plain terms do not support wildcard matching; use the regex field for pattern matching. path_search requires a non-empty plain fuzzy query and is not a directory listing endpoint; use file_list for enumeration. For large files, call file_content with start_line/end_line first to limit context size.",
194203
});
195204
jsonrpc_result(req.id, result)
196205
}
@@ -322,20 +331,20 @@ fn mcp_tools() -> Vec<Value> {
322331
vec![
323332
json!({
324333
"name": "search",
325-
"description": "Search indexed source code using structured fields (not a free-form DSL string). Use all_terms for AND semantics (all terms must match). Use any_terms for OR semantics (server executes one query per term, then merges/deduplicates using dedupe). Include repo/branch filters for version-aware questions, and set historical:true for older snapshots. path/file are glob-like filters, regex is a content regex filter, and case controls case sensitivity (yes|no|auto). At least one of all_terms, any_terms, or regex is required.",
334+
"description": "Search indexed source code using structured JSON fields only. This tool does not accept a free-form `query` string. Use all_terms for AND semantics (all terms must match). Use any_terms for OR semantics (server executes one query per term, then merges/deduplicates using dedupe). Include repo/branch filters for version-aware questions, and set historical:true for older snapshots. path/file are glob-like filters, regex is a content regex filter, and case controls case sensitivity (yes|no|auto). At least one of all_terms, any_terms, or regex is required. Do not include prefixes like repo:, path:, file:, or regex: inside field values.",
326335
"inputSchema": {
327336
"type": "object",
328337
"properties": {
329-
"repo": { "type": "string" },
330-
"branch": { "type": "string" },
331-
"lang": { "type": "string" },
332-
"path": { "type": "string", "description": "Glob-like path filter" },
333-
"file": { "type": "string", "description": "Glob-like file filter" },
334-
"regex": { "type": "string", "description": "Regex content filter" },
338+
"repo": { "type": "string", "description": "Exact repository key from repositories. Example: \"pointer\"." },
339+
"branch": { "type": "string", "description": "Exact branch name from repo_branches. Example: \"main\"." },
340+
"lang": { "type": "string", "description": "Language filter. Example: \"rust\"." },
341+
"path": { "type": "string", "description": "Glob-like path filter only. Example: \"src/mcp/**\". Do not use this for fuzzy lookup." },
342+
"file": { "type": "string", "description": "Glob-like filename/path filter. Example: \"*.rs\"." },
343+
"regex": { "type": "string", "description": "Content regex pattern only. Do not prefix with `regex:`. JSON-escape backslashes, for example \"\\\\bQueryParser\\\\(\"." },
335344
"case": { "type": "string", "enum": ["yes", "no", "auto"] },
336345
"historical": { "type": "boolean" },
337-
"all_terms": { "type": "array", "items": { "type": "string" } },
338-
"any_terms": { "type": "array", "items": { "type": "string" }, "maxItems": 8 },
346+
"all_terms": { "type": "array", "items": { "type": "string" }, "description": "Literal AND terms. Example: [\"symbol\", \"resolver\"]. Do not wrap these with content:." },
347+
"any_terms": { "type": "array", "items": { "type": "string" }, "maxItems": 8, "description": "Literal OR alternatives. Example: [\"panic\", \"unwrap\"]." },
339348
"page": { "type": "integer", "minimum": 1 },
340349
"dedupe": {
341350
"type": "string",
@@ -344,6 +353,12 @@ fn mcp_tools() -> Vec<Value> {
344353
},
345354
"max_results_per_query": { "type": "integer", "minimum": 1, "maximum": 100 }
346355
},
356+
"examples": [
357+
{ "repo": "pointer", "branch": "main", "all_terms": ["search"] },
358+
{ "repo": "pointer", "branch": "main", "any_terms": ["panic", "unwrap"], "dedupe": "repo_path_line" },
359+
{ "repo": "pointer", "regex": "\\\\bQueryParser\\\\(" },
360+
{ "repo": "pointer", "branch": "main", "historical": true, "all_terms": ["symbol"] }
361+
],
347362
"anyOf": [
348363
{ "required": ["all_terms"] },
349364
{ "required": ["any_terms"] },
@@ -375,47 +390,56 @@ fn mcp_tools() -> Vec<Value> {
375390
}),
376391
json!({
377392
"name": "file_content",
378-
"description": "Read raw indexed file content (no syntax highlighting) for an exact repo/branch/path from the index. Supports optional start_line/end_line (1-based, inclusive) for snippets to reduce context usage. Use this after file_list/path_search to inspect implementation details. Includes branch freshness metadata.",
393+
"description": "Read raw indexed file content (no syntax highlighting) for an exact repo/branch/path from the index. Supports optional start_line/end_line (1-based, inclusive) for snippets to reduce context usage. Use this after file_list/path_search to inspect implementation details; prefer exact paths returned by those tools. Includes branch freshness metadata.",
379394
"inputSchema": {
380395
"type": "object",
381396
"properties": {
382397
"repo": { "type": "string" },
383398
"branch": { "type": "string" },
384-
"path": { "type": "string" },
399+
"path": { "type": "string", "description": "Exact file path returned by file_list or path_search. Example: \"src/mcp/server.rs\"." },
385400
"start_line": { "type": "integer", "minimum": 1, "description": "Optional 1-based inclusive start line for snippet responses." },
386401
"end_line": { "type": "integer", "minimum": 1, "description": "Optional 1-based inclusive end line for snippet responses." }
387402
},
403+
"examples": [
404+
{ "repo": "pointer", "branch": "main", "path": "src/mcp/server.rs", "start_line": 1, "end_line": 40 }
405+
],
388406
"required": ["repo", "branch", "path"],
389407
"additionalProperties": false
390408
}
391409
}),
392410
json!({
393411
"name": "file_list",
394-
"description": "Enumerate files/directories under a path for a repository+branch from the index. Supports bounded recursive traversal with depth and limit. Use this for directory listing workflows and then call file_content on specific files. Response includes truncated flag, branch freshness, and stable paths.",
412+
"description": "Enumerate files/directories under a path for a repository+branch from the index. Supports bounded recursive traversal with depth and limit. Use this for directory listing workflows and then call file_content on specific files. `path` is a directory prefix, not a fuzzy search query. Response includes truncated flag, branch freshness, and stable paths.",
395413
"inputSchema": {
396414
"type": "object",
397415
"properties": {
398416
"repo": { "type": "string" },
399417
"branch": { "type": "string" },
400-
"path": { "type": "string" },
418+
"path": { "type": "string", "description": "Directory prefix to enumerate from. Example: \"src/mcp\". Do not send a fuzzy query here." },
401419
"depth": { "type": "integer", "minimum": 1, "maximum": 10 },
402420
"limit": { "type": "integer", "minimum": 1, "maximum": 5000 }
403421
},
422+
"examples": [
423+
{ "repo": "pointer", "branch": "main", "path": "src/mcp", "depth": 2, "limit": 100 }
424+
],
404425
"required": ["repo", "branch"],
405426
"additionalProperties": false
406427
}
407428
}),
408429
json!({
409430
"name": "path_search",
410-
"description": "Search file and directory paths within a repository and branch using a non-empty query (fuzzy path lookup). This is path-only matching and does not enumerate full directory contents; use file_list for enumeration and file_content for file bodies. Includes freshness metadata.",
431+
"description": "Search file and directory paths within a repository and branch using a non-empty plain fuzzy query (fuzzy path lookup). This is path-only matching and does not enumerate full directory contents; use file_list for enumeration and file_content for file bodies. Do not send filter syntax like `path:` or glob patterns here. Includes freshness metadata.",
411432
"inputSchema": {
412433
"type": "object",
413434
"properties": {
414435
"repo": { "type": "string" },
415436
"branch": { "type": "string" },
416-
"query": { "type": "string" },
437+
"query": { "type": "string", "description": "Plain fuzzy text only. Example: \"mcp serv\". Do not send `path:src/mcp` or glob syntax here." },
417438
"limit": { "type": "integer", "minimum": 1, "maximum": 50 }
418439
},
440+
"examples": [
441+
{ "repo": "pointer", "branch": "main", "query": "mcp serv", "limit": 10 }
442+
],
419443
"required": ["repo", "branch", "query"],
420444
"additionalProperties": false
421445
}
@@ -457,3 +481,39 @@ fn mcp_tools() -> Vec<Value> {
457481
}),
458482
]
459483
}
484+
485+
#[cfg(test)]
486+
mod tests {
487+
use super::{mcp_docs_payload, mcp_tools};
488+
489+
#[test]
490+
fn docs_payload_uses_structured_search_key() {
491+
let payload = mcp_docs_payload();
492+
assert!(payload.get("search_payload").is_some());
493+
assert!(payload.get("dsl").is_none());
494+
}
495+
496+
#[test]
497+
fn search_tool_schema_mentions_structured_only_usage() {
498+
let tools = mcp_tools();
499+
let search_tool = tools
500+
.iter()
501+
.find(|tool| tool.get("name").and_then(|name| name.as_str()) == Some("search"))
502+
.expect("search tool must be present");
503+
504+
let description = search_tool["description"]
505+
.as_str()
506+
.expect("search description must be a string");
507+
assert!(description.contains("does not accept a free-form `query` string"));
508+
509+
let regex_description = search_tool["inputSchema"]["properties"]["regex"]["description"]
510+
.as_str()
511+
.expect("regex description must be a string");
512+
assert!(regex_description.contains("JSON-escape backslashes"));
513+
514+
let examples = search_tool["inputSchema"]["examples"]
515+
.as_array()
516+
.expect("search examples must be present");
517+
assert!(!examples.is_empty());
518+
}
519+
}

0 commit comments

Comments
 (0)