diff --git a/.env.development b/.env.development index 02e961bab..b86e25e8a 100644 --- a/.env.development +++ b/.env.development @@ -76,3 +76,5 @@ SOURCEBOT_TELEMETRY_DISABLED=true # Disables telemetry collection # CONFIG_MAX_REPOS_NO_TOKEN= NODE_ENV=development # SOURCEBOT_TENANCY_MODE=single + +DEBUG_WRITE_CHAT_MESSAGES_TO_FILE=true \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 74d75d72e..d5ba983fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added `find_symbol_definitions`, and `find_symbol_references` tools to the MCP server. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) +- Added `list_tree` tool to the ask agent. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) +- Added input & output token breakdown in ask details card. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) + +### Fixed +- Fixed issue where ask responses would sometimes appear in the details panel while generating. [#1014](https://github.com/sourcebot-dev/sourcebot/pull/1014) + ## [4.15.9] - 2026-03-17 ### Added diff --git a/packages/shared/src/logger.ts b/packages/shared/src/logger.ts index a3f89e2cc..b142cb07c 100644 --- a/packages/shared/src/logger.ts +++ b/packages/shared/src/logger.ts @@ -32,12 +32,11 @@ const datadogFormat = format((info) => { return info; }); -const humanReadableFormat = printf(({ level, message, timestamp, stack, label: _label }) => { +const humanReadableFormat = printf(({ level, message, timestamp, stack, label: _label, ...rest }) => { const label = `[${_label}] `; - if (stack) { - return `${timestamp} ${level}: ${label}${message}\n${stack}`; - } - return `${timestamp} ${level}: ${label}${message}`; + const extras = Object.keys(rest).length > 0 ? ` ${JSON.stringify(rest)}` : ''; + const base = `${timestamp} ${level}: ${label}${message}${extras}`; + return stack ? `${base}\n${stack}` : base; }); const createLogger = (label: string) => { diff --git a/packages/web/next.config.mjs b/packages/web/next.config.mjs index de1b8b1ee..6211fcfe2 100644 --- a/packages/web/next.config.mjs +++ b/packages/web/next.config.mjs @@ -63,7 +63,14 @@ const nextConfig = { ] }, - turbopack: {}, + turbopack: { + rules: { + '*.txt': { + loaders: ['raw-loader'], + as: '*.js', + }, + }, + }, // @see: https://github.com/vercel/next.js/issues/58019#issuecomment-1910531929 ...(process.env.NODE_ENV === 'development' ? { diff --git a/packages/web/package.json b/packages/web/package.json index 040577ce6..749ea2618 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -144,6 +144,7 @@ "escape-string-regexp": "^5.0.0", "fast-deep-equal": "^3.1.3", "fuse.js": "^7.0.0", + "glob-to-regexp": "^0.4.1", "google-auth-library": "^10.1.0", "graphql": "^16.9.0", "http-status-codes": "^2.3.0", @@ -202,6 +203,7 @@ "@tanstack/eslint-plugin-query": "^5.74.7", "@testing-library/dom": "^10.4.1", "@testing-library/react": "^16.3.0", + "@types/glob-to-regexp": "^0.4.4", "@types/micromatch": "^4.0.9", "@types/node": "^20", "@types/nodemailer": "^6.4.17", @@ -218,6 +220,7 @@ "jsdom": "^25.0.1", "npm-run-all": "^4.1.5", "postcss": "^8", + "raw-loader": "^4.0.2", "react-email": "^5.1.0", "react-grab": "^0.1.23", "tailwindcss": "^3.4.1", diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index 11b124207..7207ecd63 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -14,10 +14,15 @@ import { } from "ai"; import { randomUUID } from "crypto"; import _dedent from "dedent"; -import { ANSWER_TAG, FILE_REFERENCE_PREFIX, toolNames } from "./constants"; -import { createCodeSearchTool, findSymbolDefinitionsTool, findSymbolReferencesTool, listCommitsTool, listReposTool, readFilesTool } from "./tools"; +import { ANSWER_TAG, FILE_REFERENCE_PREFIX } from "./constants"; +import { findSymbolReferencesDefinition } from "@/features/tools/findSymbolReferences"; +import { findSymbolDefinitionsDefinition } from "@/features/tools/findSymbolDefinitions"; +import { readFileDefinition } from "@/features/tools/readFile"; +import { grepDefinition } from "@/features/tools/grep"; import { Source } from "./types"; import { addLineNumbers, fileReferenceToString } from "./utils"; +import { tools } from "./tools"; +import { listTreeDefinition } from "../tools"; const dedent = _dedent.withOptions({ alignValues: true }); @@ -198,14 +203,7 @@ const createAgentStream = async ({ providerOptions, messages: inputMessages, system: systemPrompt, - tools: { - [toolNames.searchCode]: createCodeSearchTool(selectedRepos), - [toolNames.readFiles]: readFilesTool, - [toolNames.findSymbolReferences]: findSymbolReferencesTool, - [toolNames.findSymbolDefinitions]: findSymbolDefinitionsTool, - [toolNames.listRepos]: listReposTool, - [toolNames.listCommits]: listCommitsTool, - }, + tools, temperature: env.SOURCEBOT_CHAT_MODEL_TEMPERATURE, stopWhen: [ stepCountIsGTE(env.SOURCEBOT_CHAT_MAX_STEP_COUNT), @@ -223,39 +221,46 @@ const createAgentStream = async ({ return; } - if (toolName === toolNames.readFiles) { - output.forEach((file) => { + if (toolName === readFileDefinition.name) { + onWriteSource({ + type: 'file', + repo: output.metadata.repo, + path: output.metadata.path, + revision: output.metadata.revision, + name: output.metadata.path.split('/').pop() ?? output.metadata.path, + }); + } else if (toolName === grepDefinition.name) { + output.metadata.files.forEach((file) => { onWriteSource({ type: 'file', - language: file.language, - repo: file.repository, + repo: file.repo, path: file.path, revision: file.revision, name: file.path.split('/').pop() ?? file.path, }); }); - } else if (toolName === toolNames.searchCode) { - output.files.forEach((file) => { + } else if (toolName === findSymbolDefinitionsDefinition.name || toolName === findSymbolReferencesDefinition.name) { + output.metadata.files.forEach((file) => { onWriteSource({ type: 'file', - language: file.language, - repo: file.repository, + repo: file.repo, path: file.fileName, revision: file.revision, name: file.fileName.split('/').pop() ?? file.fileName, }); }); - } else if (toolName === toolNames.findSymbolDefinitions || toolName === toolNames.findSymbolReferences) { - output.forEach((file) => { - onWriteSource({ - type: 'file', - language: file.language, - repo: file.repository, - path: file.fileName, - revision: file.revision, - name: file.fileName.split('/').pop() ?? file.fileName, + } else if (toolName === listTreeDefinition.name) { + output.metadata.entries + .filter((entry) => entry.type === 'blob') + .forEach((entry) => { + onWriteSource({ + type: 'file', + repo: output.metadata.repo, + path: entry.path, + revision: output.metadata.ref, + name: entry.name, + }); }); - }); } }); }, @@ -312,6 +317,8 @@ const createPrompt = ({ The user has explicitly selected the following repositories for analysis: ${repos.map(repo => `- ${repo}`).join('\n')} + + When calling tools that accept a \`repo\` parameter (e.g. \`read_file\`, \`list_commits\`, \`list_tree\`, \`grep\`), use these repository names exactly as listed above, including the full host prefix (e.g. \`github.com/org/repo\`). ` : ''} diff --git a/packages/web/src/features/chat/components/chatThread/chatThreadListItem.tsx b/packages/web/src/features/chat/components/chatThread/chatThreadListItem.tsx index 1734a04d5..b9c9a65b1 100644 --- a/packages/web/src/features/chat/components/chatThread/chatThreadListItem.tsx +++ b/packages/web/src/features/chat/components/chatThread/chatThreadListItem.tsx @@ -13,8 +13,8 @@ import { AnswerCard } from './answerCard'; import { DetailsCard } from './detailsCard'; import { MarkdownRenderer, REFERENCE_PAYLOAD_ATTRIBUTE } from './markdownRenderer'; import { ReferencedSourcesListView } from './referencedSourcesListView'; -import { uiVisiblePartTypes } from '../../constants'; import isEqual from "fast-deep-equal/react"; +import { ANSWER_TAG } from '../../constants'; interface ChatThreadListItemProps { userMessage: SBChatMessage; @@ -95,19 +95,24 @@ const ChatThreadListItemComponent = forwardRef step // First, filter out any parts that are not text .filter((part) => { - if (part.type !== 'text') { - return true; + if (part.type === 'text') { + return !part.text.includes(ANSWER_TAG); } - return part.text !== answerPart?.text; + return true; }) .filter((part) => { - return uiVisiblePartTypes.includes(part.type); + // Only include text, reasoning, and tool parts + return ( + part.type === 'text' || + part.type === 'reasoning' || + part.type.startsWith('tool-') + ) }) ) // Then, filter out any steps that are empty .filter(step => step.length > 0); - }, [answerPart, assistantMessage?.parts]); + }, [assistantMessage?.parts]); // "thinking" is when the agent is generating output that is not the answer. const isThinking = useMemo(() => { diff --git a/packages/web/src/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/features/chat/components/chatThread/detailsCard.tsx index ff155ea00..1061acbeb 100644 --- a/packages/web/src/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/features/chat/components/chatThread/detailsCard.tsx @@ -5,17 +5,18 @@ import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/component import { Separator } from '@/components/ui/separator'; import { Skeleton } from '@/components/ui/skeleton'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; -import { cn } from '@/lib/utils'; +import { cn, getShortenedNumberDisplayString } from '@/lib/utils'; import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, List, ScanSearchIcon, Zap } from 'lucide-react'; import { memo, useCallback } from 'react'; import useCaptureEvent from '@/hooks/useCaptureEvent'; import { MarkdownRenderer } from './markdownRenderer'; import { FindSymbolDefinitionsToolComponent } from './tools/findSymbolDefinitionsToolComponent'; import { FindSymbolReferencesToolComponent } from './tools/findSymbolReferencesToolComponent'; -import { ReadFilesToolComponent } from './tools/readFilesToolComponent'; -import { SearchCodeToolComponent } from './tools/searchCodeToolComponent'; +import { ReadFileToolComponent } from './tools/readFileToolComponent'; +import { GrepToolComponent } from './tools/grepToolComponent'; import { ListReposToolComponent } from './tools/listReposToolComponent'; import { ListCommitsToolComponent } from './tools/listCommitsToolComponent'; +import { ListTreeToolComponent } from './tools/listTreeToolComponent'; import { SBChatMessageMetadata, SBChatMessagePart } from '../../types'; import { SearchScopeIcon } from '../searchScopeIcon'; import isEqual from "fast-deep-equal/react"; @@ -105,15 +106,35 @@ const DetailsCardComponent = ({ )} {metadata?.totalTokens && ( -
- - {metadata?.totalTokens} tokens -
+ + +
+ + {getShortenedNumberDisplayString(metadata.totalTokens, 0)} tokens +
+
+ +
+
+ Input + {metadata.totalInputTokens?.toLocaleString() ?? '—'} +
+
+ Output + {metadata.totalOutputTokens?.toLocaleString() ?? '—'} +
+
+ Total + {metadata.totalTokens.toLocaleString()} +
+
+
+
)} {metadata?.totalResponseTimeMs && (
- {metadata?.totalResponseTimeMs / 1000} seconds + {Math.round(metadata.totalResponseTimeMs / 1000)} seconds
)}
@@ -166,49 +187,65 @@ const DetailsCardComponent = ({ className="text-sm" /> ) - case 'tool-readFiles': + case 'tool-read_file': return ( - ) - case 'tool-searchCode': + case 'tool-grep': return ( - ) - case 'tool-findSymbolDefinitions': + case 'tool-find_symbol_definitions': return ( ) - case 'tool-findSymbolReferences': + case 'tool-find_symbol_references': return ( ) - case 'tool-listRepos': + case 'tool-list_repos': return ( ) - case 'tool-listCommits': + case 'tool-list_commits': return ( ) + case 'tool-list_tree': + return ( + + ) + case 'data-source': + case 'dynamic-tool': + case 'file': + case 'source-document': + case 'source-url': + case 'step-start': + return null; default: + // Guarantees this switch-case to be exhaustive + part satisfies never; return null; } })} diff --git a/packages/web/src/features/chat/components/chatThread/tools/findSymbolDefinitionsToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/findSymbolDefinitionsToolComponent.tsx index 792efd434..00b2e48fe 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/findSymbolDefinitionsToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/findSymbolDefinitionsToolComponent.tsx @@ -1,7 +1,6 @@ 'use client'; import { FindSymbolDefinitionsToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; import { useMemo, useState } from "react"; import { FileListItem, ToolHeader, TreeList } from "./shared"; import { CodeSnippet } from "@/app/components/codeSnippet"; @@ -29,36 +28,30 @@ export const FindSymbolDefinitionsToolComponent = ({ part }: { part: FindSymbolD
{part.state === 'output-available' && isExpanded && ( <> - {isServiceError(part.output) ? ( + {part.output.metadata.files.length === 0 ? ( + No matches found + ) : ( - Failed with the following error: {part.output.message} + {part.output.metadata.files.map((file) => { + return ( + + ) + })} - ) : ( - <> - {part.output.length === 0 ? ( - No matches found - ) : ( - - {part.output.map((file) => { - return ( - - ) - })} - - )} - )} diff --git a/packages/web/src/features/chat/components/chatThread/tools/findSymbolReferencesToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/findSymbolReferencesToolComponent.tsx index 44dcf763b..c5d4985a0 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/findSymbolReferencesToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/findSymbolReferencesToolComponent.tsx @@ -1,7 +1,6 @@ 'use client'; import { FindSymbolReferencesToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; import { useMemo, useState } from "react"; import { FileListItem, ToolHeader, TreeList } from "./shared"; import { CodeSnippet } from "@/app/components/codeSnippet"; @@ -29,36 +28,30 @@ export const FindSymbolReferencesToolComponent = ({ part }: { part: FindSymbolRe
{part.state === 'output-available' && isExpanded && ( <> - {isServiceError(part.output) ? ( + {part.output.metadata.files.length === 0 ? ( + No matches found + ) : ( - Failed with the following error: {part.output.message} + {part.output.metadata.files.map((file) => { + return ( + + ) + })} - ) : ( - <> - {part.output.length === 0 ? ( - No matches found - ) : ( - - {part.output.map((file) => { - return ( - - ) - })} - - )} - )} diff --git a/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/grepToolComponent.tsx similarity index 52% rename from packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx rename to packages/web/src/features/chat/components/chatThread/tools/grepToolComponent.tsx index ca266c599..27d94a518 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/searchCodeToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/grepToolComponent.tsx @@ -1,14 +1,13 @@ 'use client'; -import { SearchCodeToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; +import { GrepToolUIPart } from "@/features/chat/tools"; import { useMemo, useState } from "react"; import { FileListItem, ToolHeader, TreeList } from "./shared"; import { CodeSnippet } from "@/app/components/codeSnippet"; import { Separator } from "@/components/ui/separator"; import { SearchIcon } from "lucide-react"; -export const SearchCodeToolComponent = ({ part }: { part: SearchCodeToolUIPart }) => { +export const GrepToolComponent = ({ part }: { part: GrepToolUIPart }) => { const [isExpanded, setIsExpanded] = useState(false); const displayQuery = useMemo(() => { @@ -16,7 +15,7 @@ export const SearchCodeToolComponent = ({ part }: { part: SearchCodeToolUIPart } return ''; } - return part.input.query; + return part.input.pattern; }, [part]); const label = useMemo(() => { @@ -35,36 +34,30 @@ export const SearchCodeToolComponent = ({ part }: { part: SearchCodeToolUIPart }
{part.state === 'output-available' && isExpanded && ( <> - {isServiceError(part.output) ? ( + {part.output.metadata.files.length === 0 ? ( + No matches found + ) : ( - Failed with the following error: {part.output.message} + {part.output.metadata.files.map((file) => { + return ( + + ) + })} - ) : ( - <> - {part.output.files.length === 0 ? ( - No matches found - ) : ( - - {part.output.files.map((file) => { - return ( - - ) - })} - - )} - )} diff --git a/packages/web/src/features/chat/components/chatThread/tools/listCommitsToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/listCommitsToolComponent.tsx index f1cc6890e..568840f40 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/listCommitsToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/listCommitsToolComponent.tsx @@ -1,7 +1,6 @@ 'use client'; import { ListCommitsToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; import { useMemo, useState } from "react"; import { ToolHeader, TreeList } from "./shared"; import { CodeSnippet } from "@/app/components/codeSnippet"; @@ -27,57 +26,51 @@ export const ListCommitsToolComponent = ({ part }: { part: ListCommitsToolUIPart
{part.state === 'output-available' && isExpanded && ( <> - {isServiceError(part.output) ? ( - - Failed with the following error: {part.output.message} - + {part.output.metadata.commits.length === 0 ? ( + No commits found ) : ( - <> - {part.output.commits.length === 0 ? ( - No commits found - ) : ( - -
- Found {part.output.commits.length} of {part.output.totalCount} total commits: -
- {part.output.commits.map((commit) => ( -
-
- -
-
- - {commit.hash.substring(0, 7)} - - {commit.refs && ( - - {commit.refs} - - )} -
-
- {commit.message} -
-
- {commit.author} - - {new Date(commit.date).toLocaleString()} -
-
+ +
+ Found {part.output.metadata.commits.length} of {part.output.metadata.totalCount} total commits: +
+ {part.output.metadata.commits.map((commit) => ( +
+
+ +
+
+ + {commit.hash.substring(0, 7)} + + {commit.refs && ( + + {commit.refs} + + )} +
+
+ {commit.message} +
+
+ {commit.author_name} + + {new Date(commit.date).toLocaleString()}
- ))} - - )} - +
+
+ ))} +
)} diff --git a/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx index 3639b598e..d09bb6925 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/listReposToolComponent.tsx @@ -1,10 +1,8 @@ 'use client'; import { ListReposToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; import { useMemo, useState } from "react"; import { ToolHeader, TreeList } from "./shared"; -import { CodeSnippet } from "@/app/components/codeSnippet"; import { Separator } from "@/components/ui/separator"; import { FolderOpenIcon } from "lucide-react"; @@ -27,36 +25,30 @@ export const ListReposToolComponent = ({ part }: { part: ListReposToolUIPart })
{part.state === 'output-available' && isExpanded && ( <> - {isServiceError(part.output) ? ( + {part.output.metadata.repos.length === 0 ? ( + No repositories found + ) : ( - Failed with the following error: {part.output.message} +
+ Found {part.output.metadata.repos.length} of {part.output.metadata.totalCount} repositories: +
+ {part.output.metadata.repos.map((repo, index) => ( +
+ + {repo.name} +
+ ))}
- ) : ( - <> - {part.output.length === 0 ? ( - No repositories found - ) : ( - -
- Found {part.output.length} repositories: -
- {part.output.map((repoName, index) => ( -
- - {repoName} -
- ))} -
- )} - )} diff --git a/packages/web/src/features/chat/components/chatThread/tools/listTreeToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/listTreeToolComponent.tsx new file mode 100644 index 000000000..323ad8e93 --- /dev/null +++ b/packages/web/src/features/chat/components/chatThread/tools/listTreeToolComponent.tsx @@ -0,0 +1,61 @@ +'use client'; + +import { ListTreeToolUIPart } from "@/features/chat/tools"; +import { useMemo, useState } from "react"; +import { ToolHeader, TreeList } from "./shared"; +import { Separator } from "@/components/ui/separator"; +import { FileIcon, FolderIcon } from "lucide-react"; + +export const ListTreeToolComponent = ({ part }: { part: ListTreeToolUIPart }) => { + const [isExpanded, setIsExpanded] = useState(false); + + const label = useMemo(() => { + switch (part.state) { + case 'input-streaming': + return 'Listing directory tree...'; + case 'output-error': + return '"List tree" tool call failed'; + case 'input-available': + case 'output-available': + return 'Listed directory tree'; + } + }, [part]); + + return ( +
+ + {part.state === 'output-available' && isExpanded && ( + <> + {part.output.metadata.entries.length === 0 ? ( + No entries found + ) : ( + +
+ {part.output.metadata.repo} - {part.output.metadata.path || '/'} ({part.output.metadata.totalReturned} entries{part.output.metadata.truncated ? ', truncated' : ''}) +
+ {part.output.metadata.entries.map((entry, index) => ( +
+ {entry.type === 'tree' + ? + : + } + {entry.name} +
+ ))} +
+ )} + + + )} +
+ ); +}; diff --git a/packages/web/src/features/chat/components/chatThread/tools/readFileToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/readFileToolComponent.tsx new file mode 100644 index 000000000..36042f408 --- /dev/null +++ b/packages/web/src/features/chat/components/chatThread/tools/readFileToolComponent.tsx @@ -0,0 +1,53 @@ +'use client'; + +import { Separator } from "@/components/ui/separator"; +import { ReadFileToolUIPart } from "@/features/chat/tools"; +import { EyeIcon } from "lucide-react"; +import { useMemo, useState } from "react"; +import { FileListItem, ToolHeader, TreeList } from "./shared"; + +export const ReadFileToolComponent = ({ part }: { part: ReadFileToolUIPart }) => { + const [isExpanded, setIsExpanded] = useState(false); + + const label = useMemo(() => { + switch (part.state) { + case 'input-streaming': + return 'Reading...'; + case 'input-available': + return `Reading ${part.input.path}...`; + case 'output-error': + return 'Tool call failed'; + case 'output-available': + if (part.output.metadata.isTruncated || part.output.metadata.startLine > 1) { + return `Read ${part.output.metadata.path} (lines ${part.output.metadata.startLine}–${part.output.metadata.endLine})`; + } + return `Read ${part.output.metadata.path}`; + } + }, [part]); + + return ( +
+ + {part.state === 'output-available' && isExpanded && ( + <> + + + + + + )} +
+ ) +} diff --git a/packages/web/src/features/chat/components/chatThread/tools/readFilesToolComponent.tsx b/packages/web/src/features/chat/components/chatThread/tools/readFilesToolComponent.tsx deleted file mode 100644 index a31ae75b4..000000000 --- a/packages/web/src/features/chat/components/chatThread/tools/readFilesToolComponent.tsx +++ /dev/null @@ -1,60 +0,0 @@ -'use client'; - -import { CodeSnippet } from "@/app/components/codeSnippet"; -import { Separator } from "@/components/ui/separator"; -import { ReadFilesToolUIPart } from "@/features/chat/tools"; -import { isServiceError } from "@/lib/utils"; -import { EyeIcon } from "lucide-react"; -import { useMemo, useState } from "react"; -import { FileListItem, ToolHeader, TreeList } from "./shared"; - -export const ReadFilesToolComponent = ({ part }: { part: ReadFilesToolUIPart }) => { - const [isExpanded, setIsExpanded] = useState(false); - - const label = useMemo(() => { - switch (part.state) { - case 'input-streaming': - return 'Reading...'; - case 'input-available': - return `Reading ${part.input.paths.length} file${part.input.paths.length === 1 ? '' : 's'}...`; - case 'output-error': - return 'Tool call failed'; - case 'output-available': - if (isServiceError(part.output)) { - return 'Failed to read files'; - } - return `Read ${part.output.length} file${part.output.length === 1 ? '' : 's'}`; - } - }, [part]); - - return ( -
- - {part.state === 'output-available' && isExpanded && ( - <> - - {isServiceError(part.output) ? ( - Failed with the following error: {part.output.message} - ) : part.output.map((file) => { - return ( - - ) - })} - - - - )} -
- ) -} diff --git a/packages/web/src/features/chat/components/chatThread/tools/shared.tsx b/packages/web/src/features/chat/components/chatThread/tools/shared.tsx index 92c2bf3fa..77c559897 100644 --- a/packages/web/src/features/chat/components/chatThread/tools/shared.tsx +++ b/packages/web/src/features/chat/components/chatThread/tools/shared.tsx @@ -1,6 +1,7 @@ 'use client'; import { VscodeFileIcon } from '@/app/components/vscodeFileIcon'; +import { CopyIconButton } from '@/app/[domain]/components/copyIconButton'; import { ScrollArea } from '@/components/ui/scroll-area'; import { cn } from '@/lib/utils'; import { ChevronDown, ChevronRight, Loader2 } from 'lucide-react'; @@ -82,15 +83,27 @@ interface ToolHeaderProps { label: React.ReactNode; Icon: React.ElementType; onExpand: (isExpanded: boolean) => void; + input?: string; + output?: string; className?: string; } -export const ToolHeader = ({ isLoading, isError, isExpanded, label, Icon, onExpand, className }: ToolHeaderProps) => { +export const ToolHeader = ({ isLoading, isError, isExpanded, label, Icon, onExpand, input, output, className }: ToolHeaderProps) => { + const onCopy = output !== undefined + ? () => { + const text = [ + input !== undefined ? `Input:\n${input}` : null, + `Output:\n${output}`, + ].filter(Boolean).join('\n\n'); + navigator.clipboard.writeText(text); + return true; + } + : undefined; return (
)} {label} + {onCopy && ( +
e.stopPropagation()}> + +
+ )} {!isLoading && ( -
+
{isExpanded ? ( ) : ( diff --git a/packages/web/src/features/chat/constants.ts b/packages/web/src/features/chat/constants.ts index aca101a3c..b2e5de742 100644 --- a/packages/web/src/features/chat/constants.ts +++ b/packages/web/src/features/chat/constants.ts @@ -1,5 +1,3 @@ -import { SBChatMessagePart } from "./types"; - export const FILE_REFERENCE_PREFIX = '@file:'; export const FILE_REFERENCE_REGEX = new RegExp( // @file:{repoName::fileName:startLine-endLine} @@ -8,24 +6,3 @@ export const FILE_REFERENCE_REGEX = new RegExp( ); export const ANSWER_TAG = ''; - -export const toolNames = { - searchCode: 'searchCode', - readFiles: 'readFiles', - findSymbolReferences: 'findSymbolReferences', - findSymbolDefinitions: 'findSymbolDefinitions', - listRepos: 'listRepos', - listCommits: 'listCommits', -} as const; - -// These part types are visible in the UI. -export const uiVisiblePartTypes: SBChatMessagePart['type'][] = [ - 'reasoning', - 'text', - 'tool-searchCode', - 'tool-readFiles', - 'tool-findSymbolDefinitions', - 'tool-findSymbolReferences', - 'tool-listRepos', - 'tool-listCommits', -] as const; \ No newline at end of file diff --git a/packages/web/src/features/chat/logger.ts b/packages/web/src/features/chat/logger.ts new file mode 100644 index 000000000..bbd1b7001 --- /dev/null +++ b/packages/web/src/features/chat/logger.ts @@ -0,0 +1,3 @@ +import { createLogger } from "@sourcebot/shared"; + +export const logger = createLogger('ask-agent'); \ No newline at end of file diff --git a/packages/web/src/features/chat/tools.ts b/packages/web/src/features/chat/tools.ts index 713f31722..dfd031e7d 100644 --- a/packages/web/src/features/chat/tools.ts +++ b/packages/web/src/features/chat/tools.ts @@ -1,312 +1,30 @@ -import { z } from "zod" -import { search } from "@/features/search" -import { InferToolInput, InferToolOutput, InferUITool, tool, ToolUIPart } from "ai"; -import { isServiceError } from "@/lib/utils"; -import { FileSourceResponse, getFileSource, listCommits } from '@/features/git'; -import { findSearchBasedSymbolDefinitions, findSearchBasedSymbolReferences } from "../codeNav/api"; -import { addLineNumbers } from "./utils"; -import { toolNames } from "./constants"; -import { listReposQueryParamsSchema } from "@/lib/schemas"; -import { ListReposQueryParams } from "@/lib/types"; -import { listRepos } from "@/app/api/(server)/repos/listReposApi"; -import escapeStringRegexp from "escape-string-regexp"; - -// @NOTE: When adding a new tool, follow these steps: -// 1. Add the tool to the `toolNames` constant in `constants.ts`. -// 2. Add the tool to the `SBChatMessageToolTypes` type in `types.ts`. -// 3. Add the tool to the `tools` prop in `agent.ts`. -// 4. If the tool is meant to be rendered in the UI: -// - Add the tool to the `uiVisiblePartTypes` constant in `constants.ts`. -// - Add the tool's component to the `DetailsCard` switch statement in `detailsCard.tsx`. -// -// - bk, 2025-07-25 - - -export const findSymbolReferencesTool = tool({ - description: `Finds references to a symbol in the codebase.`, - inputSchema: z.object({ - symbol: z.string().describe("The symbol to find references to"), - language: z.string().describe("The programming language of the symbol"), - repository: z.string().describe("The repository to scope the search to").optional(), - }), - execute: async ({ symbol, language, repository }) => { - // @todo: make revision configurable. - const revision = "HEAD"; - - const response = await findSearchBasedSymbolReferences({ - symbolName: symbol, - language, - revisionName: "HEAD", - repoName: repository, - }); - - if (isServiceError(response)) { - return response; - } - - return response.files.map((file) => ({ - fileName: file.fileName, - repository: file.repository, - language: file.language, - matches: file.matches.map(({ lineContent, range }) => { - return addLineNumbers(lineContent, range.start.lineNumber); - }), - revision, - })); - }, -}); - -export type FindSymbolReferencesTool = InferUITool; -export type FindSymbolReferencesToolInput = InferToolInput; -export type FindSymbolReferencesToolOutput = InferToolOutput; -export type FindSymbolReferencesToolUIPart = ToolUIPart<{ [toolNames.findSymbolReferences]: FindSymbolReferencesTool }> - -export const findSymbolDefinitionsTool = tool({ - description: `Finds definitions of a symbol in the codebase.`, - inputSchema: z.object({ - symbol: z.string().describe("The symbol to find definitions of"), - language: z.string().describe("The programming language of the symbol"), - repository: z.string().describe("The repository to scope the search to").optional(), - }), - execute: async ({ symbol, language, repository }) => { - // @todo: make revision configurable. - const revision = "HEAD"; - - const response = await findSearchBasedSymbolDefinitions({ - symbolName: symbol, - language, - revisionName: revision, - repoName: repository, - }); - - if (isServiceError(response)) { - return response; - } - - return response.files.map((file) => ({ - fileName: file.fileName, - repository: file.repository, - language: file.language, - matches: file.matches.map(({ lineContent, range }) => { - return addLineNumbers(lineContent, range.start.lineNumber); - }), - revision, - })); - } -}); - -export type FindSymbolDefinitionsTool = InferUITool; -export type FindSymbolDefinitionsToolInput = InferToolInput; -export type FindSymbolDefinitionsToolOutput = InferToolOutput; -export type FindSymbolDefinitionsToolUIPart = ToolUIPart<{ [toolNames.findSymbolDefinitions]: FindSymbolDefinitionsTool }> - -export const readFilesTool = tool({ - description: `Reads the contents of multiple files at the given paths.`, - inputSchema: z.object({ - paths: z.array(z.string()).describe("The paths to the files to read"), - repository: z.string().describe("The repository to read the files from"), - }), - execute: async ({ paths, repository }) => { - // @todo: make revision configurable. - const revision = "HEAD"; - - const responses = await Promise.all(paths.map(async (path) => { - return getFileSource({ - path, - repo: repository, - ref: revision, - }, { source: 'sourcebot-ask-agent' }); - })); - - if (responses.some(isServiceError)) { - const firstError = responses.find(isServiceError); - return firstError!; - } - - return (responses as FileSourceResponse[]).map((response) => ({ - path: response.path, - repository: response.repo, - language: response.language, - source: addLineNumbers(response.source), - revision, - })); - } -}); - -export type ReadFilesTool = InferUITool; -export type ReadFilesToolInput = InferToolInput; -export type ReadFilesToolOutput = InferToolOutput; -export type ReadFilesToolUIPart = ToolUIPart<{ [toolNames.readFiles]: ReadFilesTool }> - -const DEFAULT_SEARCH_LIMIT = 100; - -export const createCodeSearchTool = (selectedRepos: string[]) => tool({ - description: `Searches for code that matches the provided search query as a substring by default, or as a regular expression if useRegex is true. Useful for exploring remote repositories by searching for exact symbols, functions, variables, or specific code patterns. To determine if a repository is indexed, use the \`listRepos\` tool. By default, searches are global and will search the default branch of all repositories. Searches can be scoped to specific repositories, languages, and branches.`, - inputSchema: z.object({ - query: z - .string() - .describe(`The search pattern to match against code contents. Do not escape quotes in your query.`) - // Escape backslashes first, then quotes, and wrap in double quotes - // so the query is treated as a literal phrase (like grep). - .transform((val) => { - const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); - return `"${escaped}"`; - }), - useRegex: z - .boolean() - .describe(`Whether to use regular expression matching to match the search query against code contents. When false, substring matching is used. (default: false)`) - .optional(), - filterByRepos: z - .array(z.string()) - .describe(`Scope the search to the provided repositories.`) - .optional(), - filterByLanguages: z - .array(z.string()) - .describe(`Scope the search to the provided languages.`) - .optional(), - filterByFilepaths: z - .array(z.string()) - .describe(`Scope the search to the provided filepaths. Each filepath is a regular expression matched against the full file path.`) - .optional(), - caseSensitive: z - .boolean() - .describe(`Whether the search should be case sensitive (default: false).`) - .optional(), - ref: z - .string() - .describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`) - .optional(), - limit: z - .number() - .default(DEFAULT_SEARCH_LIMIT) - .describe(`Maximum number of matches to return (default: ${DEFAULT_SEARCH_LIMIT})`) - .optional(), - }), - execute: async ({ - query, - useRegex = false, - filterByRepos: repos = [], - filterByLanguages: languages = [], - filterByFilepaths: filepaths = [], - caseSensitive = false, - ref, - limit = DEFAULT_SEARCH_LIMIT, - }) => { - - if (selectedRepos.length > 0) { - query += ` reposet:${selectedRepos.join(',')}`; - } - - if (repos.length > 0) { - query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; - } - - if (languages.length > 0) { - query += ` (lang:${languages.join(' or lang:')})`; - } - - if (filepaths.length > 0) { - query += ` (file:${filepaths.join(' or file:')})`; - } - - if (ref) { - query += ` (rev:${ref})`; - } - - const response = await search({ - queryType: 'string', - query, - options: { - matches: limit, - contextLines: 3, - isCaseSensitivityEnabled: caseSensitive, - isRegexEnabled: useRegex, - }, - source: 'sourcebot-ask-agent', - }); - - if (isServiceError(response)) { - return response; - } - - return { - files: response.files.map((file) => ({ - fileName: file.fileName.text, - repository: file.repository, - language: file.language, - matches: file.chunks.map(({ content, contentStart }) => { - return addLineNumbers(content, contentStart.lineNumber); - }), - // @todo: make revision configurable. - revision: 'HEAD', - })), - query, - } - }, -}); - -export type SearchCodeTool = InferUITool>; -export type SearchCodeToolInput = InferToolInput>; -export type SearchCodeToolOutput = InferToolOutput>; -export type SearchCodeToolUIPart = ToolUIPart<{ [toolNames.searchCode]: SearchCodeTool }>; - -export const listReposTool = tool({ - description: 'Lists repositories in the organization with optional filtering and pagination.', - inputSchema: listReposQueryParamsSchema, - execute: async (request: ListReposQueryParams) => { - const reposResponse = await listRepos({ ...request, source: 'sourcebot-ask-agent' }); - - if (isServiceError(reposResponse)) { - return reposResponse; - } - - return reposResponse.data.map((repo) => repo.repoName); - } -}); - -export type ListReposTool = InferUITool; -export type ListReposToolInput = InferToolInput; -export type ListReposToolOutput = InferToolOutput; -export type ListReposToolUIPart = ToolUIPart<{ [toolNames.listRepos]: ListReposTool }>; - -export const listCommitsTool = tool({ - description: 'Lists commits in a repository with optional filtering by date range, author, and commit message.', - inputSchema: z.object({ - repository: z.string().describe("The repository to list commits from"), - query: z.string().describe("Search query to filter commits by message (case-insensitive)").optional(), - since: z.string().describe("Start date for commit range (e.g., '30 days ago', '2024-01-01', 'last week')").optional(), - until: z.string().describe("End date for commit range (e.g., 'yesterday', '2024-12-31', 'today')").optional(), - author: z.string().describe("Filter commits by author name or email (case-insensitive)").optional(), - maxCount: z.number().describe("Maximum number of commits to return (default: 50)").optional(), - }), - execute: async ({ repository, query, since, until, author, maxCount }) => { - const response = await listCommits({ - repo: repository, - query, - since, - until, - author, - maxCount, - }); - - if (isServiceError(response)) { - return response; - } - - return { - commits: response.commits.map((commit) => ({ - hash: commit.hash, - date: commit.date, - message: commit.message, - author: `${commit.author_name} <${commit.author_email}>`, - refs: commit.refs, - })), - totalCount: response.totalCount, - }; - } -}); - -export type ListCommitsTool = InferUITool; -export type ListCommitsToolInput = InferToolInput; -export type ListCommitsToolOutput = InferToolOutput; -export type ListCommitsToolUIPart = ToolUIPart<{ [toolNames.listCommits]: ListCommitsTool }>; +import { + toVercelAITool, + readFileDefinition, + listCommitsDefinition, + listReposDefinition, + grepDefinition, + findSymbolReferencesDefinition, + findSymbolDefinitionsDefinition, + listTreeDefinition, +} from "@/features/tools"; +import { ToolUIPart } from "ai"; +import { SBChatMessageToolTypes } from "./types"; + +export const tools = { + [readFileDefinition.name]: toVercelAITool(readFileDefinition), + [listCommitsDefinition.name]: toVercelAITool(listCommitsDefinition), + [listReposDefinition.name]: toVercelAITool(listReposDefinition), + [grepDefinition.name]: toVercelAITool(grepDefinition), + [findSymbolReferencesDefinition.name]: toVercelAITool(findSymbolReferencesDefinition), + [findSymbolDefinitionsDefinition.name]: toVercelAITool(findSymbolDefinitionsDefinition), + [listTreeDefinition.name]: toVercelAITool(listTreeDefinition), +} as const; + +export type ReadFileToolUIPart = ToolUIPart<{ read_file: SBChatMessageToolTypes['read_file'] }>; +export type ListCommitsToolUIPart = ToolUIPart<{ list_commits: SBChatMessageToolTypes['list_commits'] }>; +export type ListReposToolUIPart = ToolUIPart<{ list_repos: SBChatMessageToolTypes['list_repos'] }>; +export type GrepToolUIPart = ToolUIPart<{ grep: SBChatMessageToolTypes['grep'] }>; +export type FindSymbolReferencesToolUIPart = ToolUIPart<{ find_symbol_references: SBChatMessageToolTypes['find_symbol_references'] }>; +export type FindSymbolDefinitionsToolUIPart = ToolUIPart<{ find_symbol_definitions: SBChatMessageToolTypes['find_symbol_definitions'] }>; +export type ListTreeToolUIPart = ToolUIPart<{ list_tree: SBChatMessageToolTypes['list_tree'] }>; diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index 9411f850f..a9923f58b 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -1,18 +1,16 @@ -import { CreateUIMessage, UIMessage, UIMessagePart } from "ai"; +import { CreateUIMessage, InferUITool, UIMessage, UIMessagePart } from "ai"; import { BaseEditor, Descendant } from "slate"; import { HistoryEditor } from "slate-history"; import { ReactEditor, RenderElementProps } from "slate-react"; import { z } from "zod"; -import { FindSymbolDefinitionsTool, FindSymbolReferencesTool, ReadFilesTool, SearchCodeTool, ListReposTool, ListCommitsTool } from "./tools"; -import { toolNames } from "./constants"; import { LanguageModel } from "@sourcebot/schemas/v3/index.type"; +import { tools } from "./tools"; const fileSourceSchema = z.object({ type: z.literal('file'), repo: z.string(), path: z.string(), name: z.string(), - language: z.string(), revision: z.string(), }); export type FileSource = z.infer; @@ -79,13 +77,8 @@ export const sbChatMessageMetadataSchema = z.object({ export type SBChatMessageMetadata = z.infer; export type SBChatMessageToolTypes = { - [toolNames.searchCode]: SearchCodeTool, - [toolNames.readFiles]: ReadFilesTool, - [toolNames.findSymbolReferences]: FindSymbolReferencesTool, - [toolNames.findSymbolDefinitions]: FindSymbolDefinitionsTool, - [toolNames.listRepos]: ListReposTool, - [toolNames.listCommits]: ListCommitsTool, -} + [K in keyof typeof tools]: InferUITool; +}; export type SBChatMessageDataParts = { // The `source` data type allows us to know what sources the LLM saw diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index f77325c4d..a1c9fd9f4 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -174,7 +174,7 @@ export const resetEditor = (editor: CustomEditor) => { } export const addLineNumbers = (source: string, lineOffset = 1) => { - return source.split('\n').map((line, index) => `${index + lineOffset}:${line}`).join('\n'); + return source.split('\n').map((line, index) => `${index + lineOffset}: ${line}`).join('\n'); } export const createUIMessage = (text: string, mentions: MentionData[], selectedSearchScopes: SearchScope[]): CreateUIMessage => { @@ -187,7 +187,6 @@ export const createUIMessage = (text: string, mentions: MentionData[], selectedS path: mention.path, repo: mention.repo, name: mention.name, - language: mention.language, revision: mention.revision, } return fileSource; @@ -338,8 +337,13 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStre const lastTextPart = message.parts .findLast((part) => part.type === 'text') - if (lastTextPart?.text.startsWith(ANSWER_TAG)) { - return lastTextPart; + if (lastTextPart?.text.includes(ANSWER_TAG)) { + const answerIndex = lastTextPart.text.indexOf(ANSWER_TAG); + const answer = lastTextPart.text.substring(answerIndex + ANSWER_TAG.length); + return { + ...lastTextPart, + text: answer + }; } // If the agent did not include the answer tag, then fallback to using the last text part. diff --git a/packages/web/src/features/mcp/server.ts b/packages/web/src/features/mcp/server.ts index 3fe3c50f7..017f0fe69 100644 --- a/packages/web/src/features/mcp/server.ts +++ b/packages/web/src/features/mcp/server.ts @@ -1,509 +1,84 @@ -import { listRepos } from '@/app/api/(server)/repos/listReposApi'; -import { getConfiguredLanguageModelsInfo } from "../chat/utils.server"; -import { askCodebase } from '@/features/mcp/askCodebase'; import { languageModelInfoSchema, } from '@/features/chat/types'; -import { getFileSource, getTree, listCommits } from '@/features/git'; -import { search } from '@/features/search/searchApi'; +import { askCodebase } from '@/features/mcp/askCodebase'; import { isServiceError } from '@/lib/utils'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { ChatVisibility } from '@sourcebot/db'; import { SOURCEBOT_VERSION } from '@sourcebot/shared'; import _dedent from 'dedent'; -import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; +import { getConfiguredLanguageModelsInfo } from "../chat/utils.server"; import { - ListTreeEntry, - TextContent, -} from './types'; -import { buildTreeNodeIndex, joinTreePath, normalizeTreePath, sortTreeEntries } from './utils'; + findSymbolDefinitionsDefinition, + findSymbolReferencesDefinition, + listCommitsDefinition, + listReposDefinition, + listTreeDefinition, + readFileDefinition, + registerMcpTool, + grepDefinition, +} from '../tools'; const dedent = _dedent.withOptions({ alignValues: true }); -const DEFAULT_MINIMUM_TOKENS = 10000; -const DEFAULT_MATCHES = 10000; -const DEFAULT_CONTEXT_LINES = 5; - -const DEFAULT_TREE_DEPTH = 1; -const MAX_TREE_DEPTH = 10; -const DEFAULT_MAX_TREE_ENTRIES = 1000; -const MAX_MAX_TREE_ENTRIES = 10000; - -const TOOL_DESCRIPTIONS = { - search_code: dedent` - Searches for code that matches the provided search query as a substring by default, or as a regular expression if useRegex is true. Useful for exploring remote repositories by - searching for exact symbols, functions, variables, or specific code patterns. - - To determine if a repository is indexed, use the \`list_repos\` tool. By default, searches are global and will search the default branch of all repositories. Searches can be - scoped to specific repositories, languages, and branches. - - When referencing code outputted by this tool, always include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out. - `, - list_commits: dedent`Get a list of commits for a given repository.`, - list_repos: dedent`Lists repositories in the organization with optional filtering and pagination.`, - read_file: dedent`Reads the source code for a given file.`, - list_tree: dedent` - Lists files and directories from a repository path. This can be used as a repo tree tool or directory listing tool. - Returns a flat list of entries with path metadata and depth relative to the requested path. - `, - list_language_models: dedent`Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling ask_codebase.`, - ask_codebase: dedent` - DO NOT USE THIS TOOL UNLESS EXPLICITLY ASKED TO. THE PROMPT MUST SPECIFICALLY ASK TO USE THE ask_codebase TOOL. - - Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. - - This is a blocking operation that may take 60+ seconds to research the codebase, so only invoke it if the user has explicitly asked you to by specifying the ask_codebase tool call in the prompt. - - The agent will: - - Analyze your question and determine what context it needs - - Search the codebase using multiple strategies (code search, symbol lookup, file reading) - - Synthesize findings into a comprehensive answer with code references - - Returns a detailed answer in markdown format with code references, plus a link to view the full research session (including all tool calls and reasoning) in the Sourcebot web UI. - - When using this in shared environments (e.g., Slack), you can set the visibility parameter to 'PUBLIC' to ensure everyone can access the chat link. - `, -}; - export function createMcpServer(): McpServer { const server = new McpServer({ name: 'sourcebot-mcp-server', version: SOURCEBOT_VERSION, }); - server.registerTool( - "search_code", - { - description: TOOL_DESCRIPTIONS.search_code, - annotations: { readOnlyHint: true }, - inputSchema: { - query: z - .string() - .describe(`The search pattern to match against code contents. Do not escape quotes in your query.`) - .transform((val) => { - const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); - return `"${escaped}"`; - }), - useRegex: z - .boolean() - .describe(`Whether to use regular expression matching. When false, substring matching is used. (default: false)`) - .optional(), - filterByRepos: z - .array(z.string()) - .describe(`Scope the search to the provided repositories.`) - .optional(), - filterByLanguages: z - .array(z.string()) - .describe(`Scope the search to the provided languages.`) - .optional(), - filterByFilepaths: z - .array(z.string()) - .describe(`Scope the search to the provided filepaths. Each filepath is a regular expression matched against the full file path.`) - .optional(), - caseSensitive: z - .boolean() - .describe(`Whether the search should be case sensitive (default: false).`) - .optional(), - includeCodeSnippets: z - .boolean() - .describe(`Whether to include code snippets in the response. If false, only the file's URL, repository, and language will be returned. (default: false)`) - .optional(), - ref: z - .string() - .describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch.`) - .optional(), - maxTokens: z - .number() - .describe(`The maximum number of tokens to return (default: ${DEFAULT_MINIMUM_TOKENS}).`) - .transform((val) => (val < DEFAULT_MINIMUM_TOKENS ? DEFAULT_MINIMUM_TOKENS : val)) - .optional(), - }, - }, - async ({ - query, - filterByRepos: repos = [], - filterByLanguages: languages = [], - filterByFilepaths: filepaths = [], - maxTokens = DEFAULT_MINIMUM_TOKENS, - includeCodeSnippets = false, - caseSensitive = false, - ref, - useRegex = false, - }: { - query: string; - useRegex?: boolean; - filterByRepos?: string[]; - filterByLanguages?: string[]; - filterByFilepaths?: string[]; - caseSensitive?: boolean; - includeCodeSnippets?: boolean; - ref?: string; - maxTokens?: number; - }) => { - if (repos.length > 0) { - query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; - } - if (languages.length > 0) { - query += ` (lang:${languages.join(' or lang:')})`; - } - if (filepaths.length > 0) { - query += ` (file:${filepaths.join(' or file:')})`; - } - if (ref) { - query += ` ( rev:${ref} )`; - } - - const response = await search({ - queryType: 'string', - query, - options: { - matches: DEFAULT_MATCHES, - contextLines: DEFAULT_CONTEXT_LINES, - isRegexEnabled: useRegex, - isCaseSensitivityEnabled: caseSensitive, - }, - source: 'mcp', - }); - - if (isServiceError(response)) { - return { - content: [{ type: "text", text: `Search failed: ${response.message}` }], - }; - } - - if (response.files.length === 0) { - return { - content: [{ type: "text", text: `No results found for the query: ${query}` }], - }; - } - - const content: TextContent[] = []; - let totalTokens = 0; - let isResponseTruncated = false; - - for (const file of response.files) { - const numMatches = file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0); - let text = dedent` - file: ${file.webUrl} - num_matches: ${numMatches} - repo: ${file.repository} - language: ${file.language} - `; - - if (includeCodeSnippets) { - const snippets = file.chunks.map(chunk => `\`\`\`\n${chunk.content}\n\`\`\``).join('\n'); - text += `\n\n${snippets}`; - } - - const tokens = text.length / 4; - - if ((totalTokens + tokens) > maxTokens) { - const remainingTokens = maxTokens - totalTokens; - if (remainingTokens > 100) { - const maxLength = Math.floor(remainingTokens * 4); - content.push({ - type: "text", - text: text.substring(0, maxLength) + "\n\n...[content truncated due to token limit]", - }); - totalTokens += remainingTokens; - } - isResponseTruncated = true; - break; - } - - totalTokens += tokens; - content.push({ type: "text", text }); - } - - if (isResponseTruncated) { - content.push({ - type: "text", - text: `The response was truncated because the number of tokens exceeded the maximum limit of ${maxTokens}.`, - }); - } - - return { content }; - } - ); + registerMcpTool(server, grepDefinition); + registerMcpTool(server, listCommitsDefinition); + registerMcpTool(server, listReposDefinition); + registerMcpTool(server, readFileDefinition); + registerMcpTool(server, listTreeDefinition); + registerMcpTool(server, findSymbolDefinitionsDefinition); + registerMcpTool(server, findSymbolReferencesDefinition); server.registerTool( - "list_commits", + "list_language_models", { - description: TOOL_DESCRIPTIONS.list_commits, - annotations: { readOnlyHint: true }, - inputSchema: z.object({ - repo: z.string().describe("The name of the repository to list commits for."), - query: z.string().describe("Search query to filter commits by message content (case-insensitive).").optional(), - since: z.string().describe("Show commits more recent than this date. Supports ISO 8601 or relative formats (e.g., '30 days ago').").optional(), - until: z.string().describe("Show commits older than this date. Supports ISO 8601 or relative formats (e.g., 'yesterday').").optional(), - author: z.string().describe("Filter commits by author name or email (case-insensitive).").optional(), - ref: z.string().describe("Commit SHA, branch or tag name to list commits of. If not provided, uses the default branch.").optional(), - page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1), - perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 50").optional().default(50), - }), - }, - async ({ repo, query, since, until, author, ref, page, perPage }) => { - const skip = (page - 1) * perPage; - const result = await listCommits({ - repo, - query, - since, - until, - author, - ref, - maxCount: perPage, - skip, - }); - - if (isServiceError(result)) { - return { - content: [{ type: "text", text: `Failed to list commits: ${result.message}` }], - }; + description: dedent`Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling ask_codebase.`, + annotations: { + readOnlyHint: true, + idempotentHint: true, } - - return { content: [{ type: "text", text: JSON.stringify(result) }] }; - } - ); - - server.registerTool( - "list_repos", - { - description: TOOL_DESCRIPTIONS.list_repos, - annotations: { readOnlyHint: true }, - inputSchema: z.object({ - query: z.string().describe("Filter repositories by name (case-insensitive)").optional(), - page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1), - perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 30").optional().default(30), - sort: z.enum(['name', 'pushed']).describe("Sort repositories by 'name' or 'pushed' (most recent commit). Default: 'name'").optional().default('name'), - direction: z.enum(['asc', 'desc']).describe("Sort direction: 'asc' or 'desc'. Default: 'asc'").optional().default('asc'), - }) }, - async ({ query, page, perPage, sort, direction }) => { - const result = await listRepos({ query, page, perPage, sort, direction, source: 'mcp' }); - - if (isServiceError(result)) { - return { - content: [{ type: "text", text: `Failed to list repositories: ${result.message}` }], - }; - } - - return { - content: [{ - type: "text", - text: JSON.stringify({ - repos: result.data.map((repo) => ({ - name: repo.repoName, - url: repo.webUrl, - pushedAt: repo.pushedAt, - defaultBranch: repo.defaultBranch, - isFork: repo.isFork, - isArchived: repo.isArchived, - })), - totalCount: result.totalCount, - }), - }], - }; - } - ); - - server.registerTool( - "read_file", - { - description: TOOL_DESCRIPTIONS.read_file, - annotations: { readOnlyHint: true }, - inputSchema: { - repo: z.string().describe("The repository name."), - path: z.string().describe("The path to the file."), - ref: z.string().optional().describe("Commit SHA, branch or tag name to fetch the source code for. If not provided, uses the default branch of the repository."), - }, - }, - async ({ repo, path, ref }) => { - const response = await getFileSource({ repo, path, ref }, { source: 'mcp' }); - - if (isServiceError(response)) { - return { - content: [{ type: "text", text: `Failed to read file: ${response.message}` }], - }; - } - - return { - content: [{ - type: "text", - text: JSON.stringify({ - source: response.source, - language: response.language, - path: response.path, - url: response.webUrl, - }), - }], - }; + async () => { + const models = await getConfiguredLanguageModelsInfo(); + return { content: [{ type: "text", text: JSON.stringify(models) }] }; } ); server.registerTool( - "list_tree", + "ask_codebase", { - description: TOOL_DESCRIPTIONS.list_tree, - annotations: { readOnlyHint: true }, - inputSchema: { - repo: z.string().describe("The name of the repository to list files from."), - path: z.string().describe("Directory path (relative to repo root). If omitted, the repo root is used.").optional().default(''), - ref: z.string().describe("Commit SHA, branch or tag name to list files from. If not provided, uses the default branch.").optional().default('HEAD'), - depth: z.number().int().positive().max(MAX_TREE_DEPTH).describe(`How many directory levels to traverse below \`path\` (min 1, max ${MAX_TREE_DEPTH}, default ${DEFAULT_TREE_DEPTH}).`).optional().default(DEFAULT_TREE_DEPTH), - includeFiles: z.boolean().describe("Whether to include files in the output (default: true).").optional().default(true), - includeDirectories: z.boolean().describe("Whether to include directories in the output (default: true).").optional().default(true), - maxEntries: z.number().int().positive().max(MAX_MAX_TREE_ENTRIES).describe(`Maximum number of entries to return (min 1, max ${MAX_MAX_TREE_ENTRIES}, default ${DEFAULT_MAX_TREE_ENTRIES}).`).optional().default(DEFAULT_MAX_TREE_ENTRIES), - }, - }, - async ({ - repo, - path = '', - ref = 'HEAD', - depth = DEFAULT_TREE_DEPTH, - includeFiles = true, - includeDirectories = true, - maxEntries = DEFAULT_MAX_TREE_ENTRIES, - }: { - repo: string; - path?: string; - ref?: string; - depth?: number; - includeFiles?: boolean; - includeDirectories?: boolean; - maxEntries?: number; - }) => { - const normalizedPath = normalizeTreePath(path); - const normalizedDepth = Math.min(depth, MAX_TREE_DEPTH); - const normalizedMaxEntries = Math.min(maxEntries, MAX_MAX_TREE_ENTRIES); - - if (!includeFiles && !includeDirectories) { - return { - content: [{ - type: "text", - text: JSON.stringify({ - repo, ref, path: normalizedPath, - entries: [] as ListTreeEntry[], - totalReturned: 0, - truncated: false, - }), - }], - }; - } - - const queue: Array<{ path: string; depth: number }> = [{ path: normalizedPath, depth: 0 }]; - const queuedPaths = new Set([normalizedPath]); - const seenEntries = new Set(); - const entries: ListTreeEntry[] = []; - let truncated = false; - let treeError: string | null = null; - - while (queue.length > 0 && !truncated) { - const currentDepth = queue[0]!.depth; - const currentLevelPaths: string[] = []; - - while (queue.length > 0 && queue[0]!.depth === currentDepth) { - currentLevelPaths.push(queue.shift()!.path); - } - - const treeResult = await getTree({ - repoName: repo, - revisionName: ref, - paths: currentLevelPaths.filter(Boolean), - }, { source: 'mcp' }); + description: dedent` + DO NOT USE THIS TOOL UNLESS EXPLICITLY ASKED TO. THE PROMPT MUST SPECIFICALLY ASK TO USE THE ask_codebase TOOL. - if (isServiceError(treeResult)) { - treeError = treeResult.message; - break; - } + Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question. - const treeNodeIndex = buildTreeNodeIndex(treeResult.tree); + This is a blocking operation that may take 60+ seconds to research the codebase, so only invoke it if the user has explicitly asked you to by specifying the ask_codebase tool call in the prompt. - for (const currentPath of currentLevelPaths) { - const currentNode = currentPath === '' ? treeResult.tree : treeNodeIndex.get(currentPath); - if (!currentNode || currentNode.type !== 'tree') continue; + The agent will: + - Analyze your question and determine what context it needs + - Search the codebase using multiple strategies (code search, symbol lookup, file reading) + - Synthesize findings into a comprehensive answer with code references - for (const child of currentNode.children) { - if (child.type !== 'tree' && child.type !== 'blob') continue; + Returns a detailed answer in markdown format with code references, plus a link to view the full research session (including all tool calls and reasoning) in the Sourcebot web UI. - const childPath = joinTreePath(currentPath, child.name); - const childDepth = currentDepth + 1; - - if (child.type === 'tree' && childDepth < normalizedDepth && !queuedPaths.has(childPath)) { - queue.push({ path: childPath, depth: childDepth }); - queuedPaths.add(childPath); - } - - if ((child.type === 'blob' && !includeFiles) || (child.type === 'tree' && !includeDirectories)) { - continue; - } - - const key = `${child.type}:${childPath}`; - if (seenEntries.has(key)) continue; - seenEntries.add(key); - - if (entries.length >= normalizedMaxEntries) { - truncated = true; - break; - } - - entries.push({ - type: child.type as 'tree' | 'blob', - path: childPath, - name: child.name, - parentPath: currentPath, - depth: childDepth, - }); - } - - if (truncated) break; - } - } - - if (treeError) { - return { - content: [{ type: "text", text: `Failed to list tree: ${treeError}` }], - }; - } - - const sortedEntries = sortTreeEntries(entries); - return { - content: [{ - type: "text", - text: JSON.stringify({ - repo, ref, path: normalizedPath, - entries: sortedEntries, - totalReturned: sortedEntries.length, - truncated, - }), - }], - }; - } - ); - - server.registerTool( - "list_language_models", - { - description: TOOL_DESCRIPTIONS.list_language_models, - annotations: { readOnlyHint: true }, - }, - async () => { - const models = await getConfiguredLanguageModelsInfo(); - return { content: [{ type: "text", text: JSON.stringify(models) }] }; - } - ); - - server.registerTool( - "ask_codebase", - { - description: TOOL_DESCRIPTIONS.ask_codebase, - annotations: { readOnlyHint: true }, + When using this in shared environments (e.g., Slack), you can set the visibility parameter to 'PUBLIC' to ensure everyone can access the chat link. + `, inputSchema: z.object({ query: z.string().describe("The query to ask about the codebase."), repos: z.array(z.string()).optional().describe("The repositories accessible to the agent. If not provided, all repositories are accessible."), languageModel: languageModelInfoSchema.optional().describe("The language model to use. If not provided, defaults to the first model in the config."), visibility: z.enum(['PRIVATE', 'PUBLIC']).optional().describe("The visibility of the chat session. Defaults to PRIVATE for authenticated users."), }), + annotations: { + readOnlyHint: true, + } }, async (request) => { const result = await askCodebase({ diff --git a/packages/web/src/features/mcp/types.ts b/packages/web/src/features/mcp/types.ts index af60fd648..b3ff5d903 100644 --- a/packages/web/src/features/mcp/types.ts +++ b/packages/web/src/features/mcp/types.ts @@ -1,13 +1,7 @@ export type TextContent = { type: "text", text: string }; -export type ListTreeEntry = { - type: 'tree' | 'blob'; - path: string; - name: string; - parentPath: string; - depth: number; -}; +export type { ListTreeEntry } from "@/features/tools/listTree"; export type ListTreeApiNode = { type: 'tree' | 'blob'; diff --git a/packages/web/src/features/mcp/utils.ts b/packages/web/src/features/mcp/utils.ts index 96ef5d568..b6de4c71a 100644 --- a/packages/web/src/features/mcp/utils.ts +++ b/packages/web/src/features/mcp/utils.ts @@ -1,6 +1,6 @@ import { FileTreeNode } from "../git"; import { ServiceError } from "@/lib/serviceError"; -import { ListTreeEntry } from "./types"; +import { ListTreeEntry } from "@/features/tools/listTree"; export const isServiceError = (data: unknown): data is ServiceError => { return typeof data === 'object' && diff --git a/packages/web/src/features/tools/adapters.ts b/packages/web/src/features/tools/adapters.ts new file mode 100644 index 000000000..4dbf0fe07 --- /dev/null +++ b/packages/web/src/features/tools/adapters.ts @@ -0,0 +1,48 @@ +import { tool } from "ai"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { z } from "zod"; +import { ToolDefinition } from "./types"; + +export function toVercelAITool( + def: ToolDefinition, +) { + return tool({ + description: def.description, + inputSchema: def.inputSchema, + execute: (input) => def.execute(input, { source: 'sourcebot-ask-agent' }), + toModelOutput: ({ output }) => ({ + type: "content", + value: [{ type: "text", text: output.output }], + }), + }); +} + +export function registerMcpTool( + server: McpServer, + def: ToolDefinition, +) { + // Widening .shape to z.ZodRawShape (its base constraint) gives TypeScript a + // concrete InputArgs so it can fully resolve BaseToolCallback's conditional + // type. def.inputSchema.parse() recovers the correctly typed value inside. + server.registerTool( + def.name, + { + description: def.description, + inputSchema: def.inputSchema.shape as z.ZodRawShape, + annotations: { + readOnlyHint: def.isReadOnly, + idempotentHint: def.isIdempotent, + }, + }, + async (input) => { + try { + const parsed = def.inputSchema.parse(input); + const result = await def.execute(parsed, { source: 'mcp' }); + return { content: [{ type: "text" as const, text: result.output }] }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { content: [{ type: "text" as const, text: `Tool "${def.name}" failed: ${message}` }], isError: true }; + } + }, + ); +} diff --git a/packages/web/src/features/tools/findSymbolDefinitions.ts b/packages/web/src/features/tools/findSymbolDefinitions.ts new file mode 100644 index 000000000..1c97467fe --- /dev/null +++ b/packages/web/src/features/tools/findSymbolDefinitions.ts @@ -0,0 +1,62 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { findSearchBasedSymbolDefinitions } from "@/features/codeNav/api"; +import { addLineNumbers } from "@/features/chat/utils"; +import { ToolDefinition } from "./types"; +import { FindSymbolFile } from "./findSymbolReferences"; +import { logger } from "./logger"; +import description from "./findSymbolDefinitions.txt"; + +const findSymbolDefinitionsShape = { + symbol: z.string().describe("The symbol to find definitions of"), + language: z.string().describe("The programming language of the symbol"), + repo: z.string().describe("The repository to scope the search to").optional(), +}; + +export type FindSymbolDefinitionsMetadata = { + files: FindSymbolFile[]; +}; + +export const findSymbolDefinitionsDefinition: ToolDefinition< + 'find_symbol_definitions', + typeof findSymbolDefinitionsShape, + FindSymbolDefinitionsMetadata +> = { + name: 'find_symbol_definitions', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(findSymbolDefinitionsShape), + execute: async ({ symbol, language, repo }, _context) => { + logger.debug('find_symbol_definitions', { symbol, language, repo }); + const revision = "HEAD"; + + const response = await findSearchBasedSymbolDefinitions({ + symbolName: symbol, + language, + revisionName: revision, + repoName: repo, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const metadata: FindSymbolDefinitionsMetadata = { + files: response.files.map((file) => ({ + fileName: file.fileName, + repo: file.repository, + language: file.language, + matches: file.matches.map(({ lineContent, range }) => { + return addLineNumbers(lineContent, range.start.lineNumber); + }), + revision, + })), + }; + + return { + output: JSON.stringify(metadata), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/findSymbolDefinitions.txt b/packages/web/src/features/tools/findSymbolDefinitions.txt new file mode 100644 index 000000000..0ba87ff08 --- /dev/null +++ b/packages/web/src/features/tools/findSymbolDefinitions.txt @@ -0,0 +1 @@ +Finds definitions of a symbol in the codebase. diff --git a/packages/web/src/features/tools/findSymbolReferences.ts b/packages/web/src/features/tools/findSymbolReferences.ts new file mode 100644 index 000000000..a1a2f0bec --- /dev/null +++ b/packages/web/src/features/tools/findSymbolReferences.ts @@ -0,0 +1,69 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { findSearchBasedSymbolReferences } from "@/features/codeNav/api"; +import { addLineNumbers } from "@/features/chat/utils"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./findSymbolReferences.txt"; + +const findSymbolReferencesShape = { + symbol: z.string().describe("The symbol to find references to"), + language: z.string().describe("The programming language of the symbol"), + repo: z.string().describe("The repository to scope the search to").optional(), +}; + +export type FindSymbolFile = { + fileName: string; + repo: string; + language: string; + matches: string[]; + revision: string; +}; + +export type FindSymbolReferencesMetadata = { + files: FindSymbolFile[]; +}; + +export const findSymbolReferencesDefinition: ToolDefinition< + 'find_symbol_references', + typeof findSymbolReferencesShape, + FindSymbolReferencesMetadata +> = { + name: 'find_symbol_references', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(findSymbolReferencesShape), + execute: async ({ symbol, language, repo }, _context) => { + logger.debug('find_symbol_references', { symbol, language, repo }); + const revision = "HEAD"; + + const response = await findSearchBasedSymbolReferences({ + symbolName: symbol, + language, + revisionName: revision, + repoName: repo, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const metadata: FindSymbolReferencesMetadata = { + files: response.files.map((file) => ({ + fileName: file.fileName, + repo: file.repository, + language: file.language, + matches: file.matches.map(({ lineContent, range }) => { + return addLineNumbers(lineContent, range.start.lineNumber); + }), + revision, + })), + }; + + return { + output: JSON.stringify(metadata), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/findSymbolReferences.txt b/packages/web/src/features/tools/findSymbolReferences.txt new file mode 100644 index 000000000..e35a2c87b --- /dev/null +++ b/packages/web/src/features/tools/findSymbolReferences.txt @@ -0,0 +1 @@ +Finds references to a symbol in the codebase. diff --git a/packages/web/src/features/tools/grep.ts b/packages/web/src/features/tools/grep.ts new file mode 100644 index 000000000..e9288a05b --- /dev/null +++ b/packages/web/src/features/tools/grep.ts @@ -0,0 +1,158 @@ +import { z } from "zod"; +import globToRegexp from "glob-to-regexp"; +import { isServiceError } from "@/lib/utils"; +import { search } from "@/features/search"; +import escapeStringRegexp from "escape-string-regexp"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./grep.txt"; + +const DEFAULT_SEARCH_LIMIT = 100; +const MAX_LINE_LENGTH = 2000; +const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`; + +function globToFileRegexp(glob: string): string { + const re = globToRegexp(glob, { extended: true, globstar: true }); + return re.source.replace(/^\^/, ''); +} + +const grepShape = { + pattern: z + .string() + .describe(`The regex pattern to search for in file contents`), + path: z + .string() + .describe(`The directory to search in. Defaults to the repository root.`) + .optional(), + include: z + .string() + .describe(`File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")`) + .optional(), + repo: z + .string() + .describe(`The name of the repository to search in. If not provided, searches all repositories.`) + .optional(), + ref: z + .string() + .describe(`The commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`) + .optional(), + limit: z + .number() + .default(DEFAULT_SEARCH_LIMIT) + .describe(`The maximum number of matches to return (default: ${DEFAULT_SEARCH_LIMIT})`) + .optional(), +}; + +export type GrepFile = { + path: string; + name: string; + repo: string; + revision: string; +}; + +export type GrepMetadata = { + files: GrepFile[]; + query: string; +}; + +export const grepDefinition: ToolDefinition<'grep', typeof grepShape, GrepMetadata> = { + name: 'grep', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(grepShape), + execute: async ({ + pattern, + path, + include, + repo, + ref, + limit = DEFAULT_SEARCH_LIMIT, + }, context) => { + logger.debug('grep', { pattern, path, include, repo, ref, limit }); + + const quotedPattern = `"${pattern.replace(/"/g, '\\"')}"`; + let query = quotedPattern; + + if (path) { + query += ` file:${escapeStringRegexp(path)}`; + } + + if (include) { + query += ` file:${globToFileRegexp(include)}`; + } + + if (repo) { + query += ` repo:${escapeStringRegexp(repo)}`; + } + + if (ref) { + query += ` (rev:${ref})`; + } + + const response = await search({ + queryType: 'string', + query, + options: { + matches: limit, + contextLines: 0, + isCaseSensitivityEnabled: true, + isRegexEnabled: true, + }, + source: context.source, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + const metadata: GrepMetadata = { + files: response.files.map((file) => ({ + path: file.fileName.text, + name: file.fileName.text.split('/').pop() ?? file.fileName.text, + repo: file.repository, + revision: ref ?? 'HEAD', + } satisfies GrepFile)), + query, + }; + + const totalFiles = response.files.length; + const actualMatches = response.stats.actualMatchCount; + + if (totalFiles === 0) { + return { + output: 'No files found', + metadata, + }; + } + + const outputLines: string[] = [ + `Found ${actualMatches} match${actualMatches !== 1 ? 'es' : ''} in ${totalFiles} file${totalFiles !== 1 ? 's' : ''}`, + ]; + + for (const file of response.files) { + outputLines.push(''); + outputLines.push(`[${file.repository}] ${file.fileName.text}:`); + for (const chunk of file.chunks) { + chunk.content.split('\n').forEach((content, i) => { + if (!content.trim()) return; + const lineNum = chunk.contentStart.lineNumber + i; + const line = content.length > MAX_LINE_LENGTH + ? content.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX + : content; + outputLines.push(` ${lineNum}: ${line}`); + }); + } + } + + if (!response.isSearchExhaustive) { + outputLines.push(''); + outputLines.push(`(Results truncated. Consider using a more specific path or pattern, specifying a repo, or increasing the limit.)`); + } + + return { + output: outputLines.join('\n'), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/grep.txt b/packages/web/src/features/tools/grep.txt new file mode 100644 index 000000000..dccdac441 --- /dev/null +++ b/packages/web/src/features/tools/grep.txt @@ -0,0 +1,7 @@ +- Fast content search tool that works with any codebase size +- Searches file contents using regular expressions +- Supports full regex syntax (eg. "log.*Error", "function\s+\w+", etc.) +- Filter files by pattern with the include parameter (eg. "*.js", "*.{ts,tsx}") +- Returns file paths and line numbers with at least one match +- Use this tool when you need to find files containing specific patterns +- When using the `repo` param, if the repository name is not known, use `list_repos` first to discover the correct name. diff --git a/packages/web/src/features/tools/index.ts b/packages/web/src/features/tools/index.ts new file mode 100644 index 000000000..49a1a1b2d --- /dev/null +++ b/packages/web/src/features/tools/index.ts @@ -0,0 +1,8 @@ +export * from './readFile'; +export * from './listCommits'; +export * from './listRepos'; +export * from './grep'; +export * from './findSymbolReferences'; +export * from './findSymbolDefinitions'; +export * from './listTree'; +export * from './adapters'; \ No newline at end of file diff --git a/packages/web/src/features/tools/listCommits.ts b/packages/web/src/features/tools/listCommits.ts new file mode 100644 index 000000000..c40ff1137 --- /dev/null +++ b/packages/web/src/features/tools/listCommits.ts @@ -0,0 +1,53 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { listCommits, SearchCommitsResult } from "@/features/git"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./listCommits.txt"; + +const listCommitsShape = { + repo: z.string().describe("The repository to list commits from"), + query: z.string().describe("Search query to filter commits by message (case-insensitive)").optional(), + since: z.string().describe("Start date for commit range (e.g., '30 days ago', '2024-01-01', 'last week')").optional(), + until: z.string().describe("End date for commit range (e.g., 'yesterday', '2024-12-31', 'today')").optional(), + author: z.string().describe("Filter commits by author name or email (case-insensitive)").optional(), + ref: z.string().describe("Commit SHA, branch or tag name to list commits of. If not provided, uses the default branch.").optional(), + page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1), + perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 50").optional().default(50), +}; + +export type ListCommitsMetadata = SearchCommitsResult; + +export const listCommitsDefinition: ToolDefinition<"list_commits", typeof listCommitsShape, ListCommitsMetadata> = { + name: "list_commits", + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(listCommitsShape), + execute: async (params, _context) => { + logger.debug('list_commits', params); + + const { repo, query, since, until, author, ref, page, perPage } = params; + const skip = (page - 1) * perPage; + + const response = await listCommits({ + repo, + query, + since, + until, + author, + ref, + maxCount: perPage, + skip, + }); + + if (isServiceError(response)) { + throw new Error(response.message); + } + + return { + output: JSON.stringify(response), + metadata: response, + }; + }, +}; diff --git a/packages/web/src/features/tools/listCommits.txt b/packages/web/src/features/tools/listCommits.txt new file mode 100644 index 000000000..b82afe97a --- /dev/null +++ b/packages/web/src/features/tools/listCommits.txt @@ -0,0 +1 @@ +Lists commits in a repository with optional filtering by date range, author, and commit message. diff --git a/packages/web/src/features/tools/listRepos.ts b/packages/web/src/features/tools/listRepos.ts new file mode 100644 index 000000000..70b731096 --- /dev/null +++ b/packages/web/src/features/tools/listRepos.ts @@ -0,0 +1,72 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { listRepos } from "@/app/api/(server)/repos/listReposApi"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from './listRepos.txt'; + +const listReposShape = { + query: z.string().describe("Filter repositories by name (case-insensitive)").optional(), + page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1), + perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 30").optional().default(30), + sort: z.enum(['name', 'pushed']).describe("Sort repositories by 'name' or 'pushed' (most recent commit). Default: 'name'").optional().default('name'), + direction: z.enum(['asc', 'desc']).describe("Sort direction: 'asc' or 'desc'. Default: 'asc'").optional().default('asc'), +}; + +export type ListRepo = { + name: string; + url: string | null; + pushedAt: string | null; + defaultBranch: string | null; + isFork: boolean; + isArchived: boolean; +}; + +export type ListReposMetadata = { + repos: ListRepo[]; + totalCount: number; +}; + +export const listReposDefinition: ToolDefinition< + 'list_repos', + typeof listReposShape, + ListReposMetadata +> = { + name: 'list_repos', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(listReposShape), + execute: async ({ page, perPage, sort, direction, query }, context) => { + logger.debug('list_repos', { page, perPage, sort, direction, query }); + const reposResponse = await listRepos({ + page, + perPage, + sort, + direction, + query, + source: context.source, + }); + + if (isServiceError(reposResponse)) { + throw new Error(reposResponse.message); + } + + const metadata: ListReposMetadata = { + repos: reposResponse.data.map((repo) => ({ + name: repo.repoName, + url: repo.webUrl ?? null, + pushedAt: repo.pushedAt?.toISOString() ?? null, + defaultBranch: repo.defaultBranch ?? null, + isFork: repo.isFork, + isArchived: repo.isArchived, + })), + totalCount: reposResponse.totalCount, + }; + + return { + output: JSON.stringify(metadata), + metadata, + }; + }, +}; diff --git a/packages/web/src/features/tools/listRepos.txt b/packages/web/src/features/tools/listRepos.txt new file mode 100644 index 000000000..343546d27 --- /dev/null +++ b/packages/web/src/features/tools/listRepos.txt @@ -0,0 +1 @@ +Lists repositories in the organization with optional filtering and pagination. diff --git a/packages/web/src/features/tools/listTree.ts b/packages/web/src/features/tools/listTree.ts new file mode 100644 index 000000000..7e774f516 --- /dev/null +++ b/packages/web/src/features/tools/listTree.ts @@ -0,0 +1,174 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { getTree } from "@/features/git"; +import { buildTreeNodeIndex, joinTreePath, normalizeTreePath, sortTreeEntries } from "@/features/mcp/utils"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./listTree.txt"; + +const DEFAULT_TREE_DEPTH = 1; +const MAX_TREE_DEPTH = 10; +const DEFAULT_MAX_TREE_ENTRIES = 1000; +const MAX_MAX_TREE_ENTRIES = 10000; + +const listTreeShape = { + repo: z.string().describe("The name of the repository to list files from."), + path: z.string().describe("Directory path (relative to repo root). If omitted, the repo root is used.").optional().default(''), + ref: z.string().describe("Commit SHA, branch or tag name to list files from. If not provided, uses the default branch.").optional().default('HEAD'), + depth: z.number().int().positive().max(MAX_TREE_DEPTH).describe(`How many directory levels to traverse below \`path\` (min 1, max ${MAX_TREE_DEPTH}, default ${DEFAULT_TREE_DEPTH}).`).optional().default(DEFAULT_TREE_DEPTH), + includeFiles: z.boolean().describe("Whether to include files in the output (default: true).").optional().default(true), + includeDirectories: z.boolean().describe("Whether to include directories in the output (default: true).").optional().default(true), + maxEntries: z.number().int().positive().max(MAX_MAX_TREE_ENTRIES).describe(`Maximum number of entries to return (min 1, max ${MAX_MAX_TREE_ENTRIES}, default ${DEFAULT_MAX_TREE_ENTRIES}).`).optional().default(DEFAULT_MAX_TREE_ENTRIES), +}; + +export type ListTreeEntry = { + type: 'tree' | 'blob'; + path: string; + name: string; + parentPath: string; + depth: number; +}; + +export type ListTreeMetadata = { + repo: string; + ref: string; + path: string; + entries: ListTreeEntry[]; + totalReturned: number; + truncated: boolean; +}; + +export const listTreeDefinition: ToolDefinition<'list_tree', typeof listTreeShape, ListTreeMetadata> = { + name: 'list_tree', + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(listTreeShape), + execute: async ({ repo, path = '', ref = 'HEAD', depth = DEFAULT_TREE_DEPTH, includeFiles = true, includeDirectories = true, maxEntries = DEFAULT_MAX_TREE_ENTRIES }, context) => { + logger.debug('list_tree', { repo, path, ref, depth, includeFiles, includeDirectories, maxEntries }); + const normalizedPath = normalizeTreePath(path); + const normalizedDepth = Math.min(depth, MAX_TREE_DEPTH); + const normalizedMaxEntries = Math.min(maxEntries, MAX_MAX_TREE_ENTRIES); + + if (!includeFiles && !includeDirectories) { + const metadata: ListTreeMetadata = { + repo, + ref, + path: normalizedPath, + entries: [], + totalReturned: 0, + truncated: false, + }; + return { output: 'No entries found', metadata }; + } + + const queue: Array<{ path: string; depth: number }> = [{ path: normalizedPath, depth: 0 }]; + const queuedPaths = new Set([normalizedPath]); + const seenEntries = new Set(); + const entries: ListTreeEntry[] = []; + let truncated = false; + + while (queue.length > 0 && !truncated) { + const currentDepth = queue[0]!.depth; + const currentLevelPaths: string[] = []; + + while (queue.length > 0 && queue[0]!.depth === currentDepth) { + currentLevelPaths.push(queue.shift()!.path); + } + + const treeResult = await getTree({ + repoName: repo, + revisionName: ref, + paths: currentLevelPaths.filter(Boolean), + }, { source: context.source }); + + if (isServiceError(treeResult)) { + throw new Error(treeResult.message); + } + + const treeNodeIndex = buildTreeNodeIndex(treeResult.tree); + + for (const currentPath of currentLevelPaths) { + const currentNode = currentPath === '' ? treeResult.tree : treeNodeIndex.get(currentPath); + if (!currentNode || currentNode.type !== 'tree') continue; + + for (const child of currentNode.children) { + if (child.type !== 'tree' && child.type !== 'blob') continue; + + const childPath = joinTreePath(currentPath, child.name); + const childDepth = currentDepth + 1; + + if (child.type === 'tree' && childDepth < normalizedDepth && !queuedPaths.has(childPath)) { + queue.push({ path: childPath, depth: childDepth }); + queuedPaths.add(childPath); + } + + if ((child.type === 'blob' && !includeFiles) || (child.type === 'tree' && !includeDirectories)) { + continue; + } + + const key = `${child.type}:${childPath}`; + if (seenEntries.has(key)) continue; + seenEntries.add(key); + + if (entries.length >= normalizedMaxEntries) { + truncated = true; + break; + } + + entries.push({ + type: child.type as 'tree' | 'blob', + path: childPath, + name: child.name, + parentPath: currentPath, + depth: childDepth, + }); + } + + if (truncated) break; + } + } + + const sortedEntries = sortTreeEntries(entries); + const metadata: ListTreeMetadata = { + repo, ref, path: normalizedPath, + entries: sortedEntries, + totalReturned: sortedEntries.length, + truncated, + }; + + const outputLines = [normalizedPath || '/']; + + const childrenByPath = new Map(); + for (const entry of sortedEntries) { + const siblings = childrenByPath.get(entry.parentPath) ?? []; + siblings.push(entry); + childrenByPath.set(entry.parentPath, siblings); + } + + function renderEntries(parentPath: string) { + const children = childrenByPath.get(parentPath) ?? []; + for (const entry of children) { + const indent = ' '.repeat(entry.depth); + const label = entry.type === 'tree' ? `${entry.name}/` : entry.name; + outputLines.push(`${indent}${label}`); + if (entry.type === 'tree') { + renderEntries(entry.path); + } + } + } + + renderEntries(normalizedPath); + + if (sortedEntries.length === 0) { + outputLines.push(' (no entries found)'); + } + + if (truncated) { + outputLines.push(''); + outputLines.push(`(truncated — showing first ${normalizedMaxEntries} entries)`); + } + + return { output: outputLines.join('\n'), metadata }; + }, +}; diff --git a/packages/web/src/features/tools/listTree.txt b/packages/web/src/features/tools/listTree.txt new file mode 100644 index 000000000..3737ddfd9 --- /dev/null +++ b/packages/web/src/features/tools/listTree.txt @@ -0,0 +1,9 @@ +Lists files and directories from a repository path. This can be used as a repo tree tool or directory listing tool. Returns a flat list of entries with path metadata and depth relative to the requested path. + +Usage: +- If the repository name is not known, use `list_repos` first to discover the correct name. +- Start with a shallow depth (default: 1) to get a high-level overview, then drill into specific subdirectories as needed. +- Use `path` to scope the listing to a subdirectory rather than fetching the entire tree at once. +- Set `includeFiles: false` to list only directories when you only need the directory structure. +- Set `includeDirectories: false` to list only files when you only need leaf nodes. +- Call this tool in parallel when you need to explore multiple directories simultaneously. diff --git a/packages/web/src/features/tools/logger.ts b/packages/web/src/features/tools/logger.ts new file mode 100644 index 000000000..2d1bb7dbe --- /dev/null +++ b/packages/web/src/features/tools/logger.ts @@ -0,0 +1,3 @@ +import { createLogger } from "@sourcebot/shared"; + +export const logger = createLogger('tool'); diff --git a/packages/web/src/features/tools/readFile.ts b/packages/web/src/features/tools/readFile.ts new file mode 100644 index 000000000..9cf064dd4 --- /dev/null +++ b/packages/web/src/features/tools/readFile.ts @@ -0,0 +1,111 @@ +import { z } from "zod"; +import { isServiceError } from "@/lib/utils"; +import { getFileSource } from "@/features/git"; +import { ToolDefinition } from "./types"; +import { logger } from "./logger"; +import description from "./readFile.txt"; + +// NOTE: if you change these values, update readFile.txt to match. +const READ_FILES_MAX_LINES = 500; +const MAX_LINE_LENGTH = 2000; +const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`; +const MAX_BYTES = 5 * 1024; +const MAX_BYTES_LABEL = `${MAX_BYTES / 1024}KB`; + +const readFileShape = { + path: z.string().describe("The path to the file"), + repo: z.string().describe("The repository to read the file from"), + offset: z.number().int().positive() + .optional() + .describe("Line number to start reading from (1-indexed). Omit to start from the beginning."), + limit: z.number().int().positive() + .optional() + .describe(`Maximum number of lines to read (max: ${READ_FILES_MAX_LINES}). Omit to read up to ${READ_FILES_MAX_LINES} lines.`), +}; + +export type ReadFileMetadata = { + path: string; + repo: string; + language: string; + startLine: number; + endLine: number; + isTruncated: boolean; + revision: string; +}; + +export const readFileDefinition: ToolDefinition<"read_file", typeof readFileShape, ReadFileMetadata> = { + name: "read_file", + isReadOnly: true, + isIdempotent: true, + description, + inputSchema: z.object(readFileShape), + execute: async ({ path, repo, offset, limit }, context) => { + logger.debug('read_file', { path, repo, offset, limit }); + // @todo: make revision configurable. + const revision = "HEAD"; + + const fileSource = await getFileSource({ + path, + repo, + ref: revision, + }, { source: context.source }); + + if (isServiceError(fileSource)) { + throw new Error(fileSource.message); + } + + const lines = fileSource.source.split('\n'); + const start = (offset ?? 1) - 1; + const end = start + Math.min(limit ?? READ_FILES_MAX_LINES, READ_FILES_MAX_LINES); + + let bytes = 0; + let truncatedByBytes = false; + const slicedLines: string[] = []; + for (const raw of lines.slice(start, end)) { + const line = raw.length > MAX_LINE_LENGTH ? raw.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : raw; + const size = Buffer.byteLength(line, 'utf-8') + (slicedLines.length > 0 ? 1 : 0); + if (bytes + size > MAX_BYTES) { + truncatedByBytes = true; + break; + } + slicedLines.push(line); + bytes += size; + } + + const truncatedByLines = end < lines.length; + const startLine = (offset ?? 1); + const lastReadLine = startLine + slicedLines.length - 1; + const nextOffset = lastReadLine + 1; + + let output = [ + `${fileSource.repo}`, + `${fileSource.path}`, + `${fileSource.webUrl}`, + '\n' + ].join('\n'); + + output += slicedLines.map((line, i) => `${startLine + i}: ${line}`).join('\n'); + + if (truncatedByBytes) { + output += `\n\n(Output capped at ${MAX_BYTES_LABEL}. Showing lines ${startLine}-${lastReadLine} of ${lines.length}. Use offset=${nextOffset} to continue.)`; + } else if (truncatedByLines) { + output += `\n\n(Showing lines ${startLine}-${lastReadLine} of ${lines.length}. Use offset=${nextOffset} to continue.)`; + } else { + output += `\n\n(End of file - ${lines.length} lines total)`; + } + + output += `\n`; + + const metadata: ReadFileMetadata = { + path: fileSource.path, + repo: fileSource.repo, + language: fileSource.language, + startLine, + endLine: lastReadLine, + isTruncated: truncatedByBytes || truncatedByLines, + revision, + }; + + return { output, metadata }; + }, +}; diff --git a/packages/web/src/features/tools/readFile.txt b/packages/web/src/features/tools/readFile.txt new file mode 100644 index 000000000..9e1590bb6 --- /dev/null +++ b/packages/web/src/features/tools/readFile.txt @@ -0,0 +1,9 @@ +Read the contents of a file in a repository. + +Usage: +- Use offset/limit to read a specific portion of a file, which is strongly preferred for large files when only a specific section is needed. +- Maximum 500 lines per call. Output is also capped at 5KB — if the cap is hit, call again with a larger offset to continue reading. +- Any line longer than 2000 characters is truncated. +- The response content includes the line range read and total line count. If the output was truncated, the next offset to continue reading is also included. +- Call this tool in parallel when you need to read multiple files simultaneously. +- Avoid tiny repeated slices. If you need more context, read a larger window. diff --git a/packages/web/src/features/tools/types.ts b/packages/web/src/features/tools/types.ts new file mode 100644 index 000000000..9e580bbd3 --- /dev/null +++ b/packages/web/src/features/tools/types.ts @@ -0,0 +1,23 @@ +import { z } from "zod"; + +export interface ToolContext { + source?: string; +} + +export interface ToolDefinition< + TName extends string, + TShape extends z.ZodRawShape, + TMetadata = Record, +> { + name: TName; + description: string; + inputSchema: z.ZodObject; + isReadOnly: boolean; + isIdempotent: boolean; + execute: (input: z.infer>, context: ToolContext) => Promise>; +} + +export interface ToolResult> { + output: string; + metadata: TMetadata; +} diff --git a/packages/web/src/lib/utils.ts b/packages/web/src/lib/utils.ts index dd7f783e5..d61832326 100644 --- a/packages/web/src/lib/utils.ts +++ b/packages/web/src/lib/utils.ts @@ -507,13 +507,13 @@ export const getFormattedDate = (date: Date) => { /** * Converts a number to a string */ -export const getShortenedNumberDisplayString = (number: number) => { +export const getShortenedNumberDisplayString = (number: number, fractionDigits: number = 1) => { if (number < 1000) { return number.toString(); } else if (number < 1000000) { - return `${(number / 1000).toFixed(1)}k`; + return `${(number / 1000).toFixed(fractionDigits)}k`; } else { - return `${(number / 1000000).toFixed(1)}m`; + return `${(number / 1000000).toFixed(fractionDigits)}m`; } } diff --git a/packages/web/tools/globToRegexpPlayground.ts b/packages/web/tools/globToRegexpPlayground.ts new file mode 100644 index 000000000..fc915b55b --- /dev/null +++ b/packages/web/tools/globToRegexpPlayground.ts @@ -0,0 +1,111 @@ +import globToRegexp from 'glob-to-regexp'; +import escapeStringRegexp from 'escape-string-regexp'; + +// ------------------------------------------------------- +// Playground for building Sourcebot/zoekt search queries +// from grep-style (pattern, path, include) inputs. +// +// Run with: yarn workspace @sourcebot/web tsx tools/globToRegexpPlayground.ts +// ------------------------------------------------------- + +interface SearchInput { + pattern: string; // content search term or regex + path?: string; // directory prefix, e.g. "packages/web/src" + include?: string; // glob for filenames, e.g. "*.ts" or "**/*.{ts,tsx}" +} + +function globToFileRegexp(glob: string): string { + const re = globToRegexp(glob, { extended: true, globstar: true }); + // Strip ^ anchor — Sourcebot file paths include the full repo-relative path, + // so the pattern shouldn't be anchored to the start. + return re.source.replace(/^\^/, ''); +} + +function buildRipgrepCommand({ pattern, path, include }: SearchInput): string { + const parts = ['rg', `"${pattern.replace(/"/g, '\\"')}"`]; + if (path) parts.push(path); + if (include) parts.push(`--glob "${include}"`); + return parts.join(' '); +} + +function buildZoektQuery({ pattern, path, include }: SearchInput): string { + const parts: string[] = [`"${pattern.replace(/"/g, '\\"')}"`]; + + if (path) { + parts.push(`file:${escapeStringRegexp(path)}`); + } + + if (include) { + parts.push(`file:${globToFileRegexp(include)}`); + } + + return parts.join(' '); +} + +// ------------------------------------------------------- +// Examples +// ------------------------------------------------------- + +const examples: SearchInput[] = [ + // Broad content search, no file scoping + { pattern: 'isServiceError' }, + + // Scoped to a directory + { pattern: 'isServiceError', path: 'packages/web/src' }, + + // Scoped to a file type + { pattern: 'isServiceError', include: '*.ts' }, + + // Scoped to both + { pattern: 'isServiceError', path: 'packages/web/src', include: '*.ts' }, + + // Multiple extensions via glob + { pattern: 'useQuery', include: '**/*.{ts,tsx}' }, + + // Test files only + { pattern: 'expect\\(', include: '*.test.ts' }, + + // Specific subdirectory + extension + { pattern: 'withAuthV2', path: 'packages/web/src/app', include: '**/*.ts' }, + + // Next.js route group — parens in path are regex special chars + { pattern: 'withAuthV2', path: 'packages/web/src/app/api/(server)', include: '**/*.ts' }, + + // Next.js dynamic segment — brackets in path are regex special chars + { pattern: 'withOptionalAuthV2', path: 'packages/web/src/app/[domain]', include: '**/*.ts' }, + + // Pattern with spaces — must be quoted in zoekt query + { pattern: 'Starting scheduler', include: '**/*.ts' }, + + // Literal phrase in a txt file + { pattern: String.raw`"hello world"`, include: '**/*.txt' }, + + // Pattern with a quote character + { pattern: 'from "@/lib', include: '**/*.ts' }, + + // Pattern with a backslash — needs double-escaping in zoekt quoted terms + { pattern: String.raw`C:\\\\Windows\\\\System32`, include: '**/*.ts' }, +]; + +function truncate(str: string, width: number): string { + return str.length > width ? str.slice(0, width - 3) + '...' : str.padEnd(width); +} + +const col1 = 70; +const col2 = 75; +console.log(truncate('input', col1) + truncate('ripgrep', col2) + 'zoekt query'); +console.log('-'.repeat(col1 + col2 + 50)); + +function prettyPrint(example: SearchInput): string { + const fields = Object.entries(example) + .map(([k, v]) => `${k}: '${v}'`) + .join(', '); + return `{ ${fields} }`; +} + +for (const example of examples) { + const input = prettyPrint(example); + const rg = buildRipgrepCommand(example); + const zoekt = buildZoektQuery(example); + console.log(truncate(input, col1) + rg.padEnd(col2) + zoekt); +} diff --git a/packages/web/types.d.ts b/packages/web/types.d.ts new file mode 100644 index 000000000..bceb5175d --- /dev/null +++ b/packages/web/types.d.ts @@ -0,0 +1,4 @@ +declare module '*.txt' { + const content: string; + export default content; +} diff --git a/yarn.lock b/yarn.lock index 4a2dfdb3f..ce65eec17 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8949,6 +8949,7 @@ __metadata: "@tanstack/react-virtual": "npm:^3.10.8" "@testing-library/dom": "npm:^10.4.1" "@testing-library/react": "npm:^16.3.0" + "@types/glob-to-regexp": "npm:^0.4.4" "@types/micromatch": "npm:^4.0.9" "@types/node": "npm:^20" "@types/nodemailer": "npm:^6.4.17" @@ -8998,6 +8999,7 @@ __metadata: eslint-plugin-react-hooks: "npm:^7.0.1" fast-deep-equal: "npm:^3.1.3" fuse.js: "npm:^7.0.0" + glob-to-regexp: "npm:^0.4.1" google-auth-library: "npm:^10.1.0" graphql: "npm:^16.9.0" http-status-codes: "npm:^2.3.0" @@ -9022,6 +9024,7 @@ __metadata: posthog-node: "npm:^5.24.15" pretty-bytes: "npm:^6.1.1" psl: "npm:^1.15.0" + raw-loader: "npm:^4.0.2" react: "npm:19.2.4" react-device-detect: "npm:^2.2.3" react-dom: "npm:19.2.4" @@ -9462,6 +9465,13 @@ __metadata: languageName: node linkType: hard +"@types/glob-to-regexp@npm:^0.4.4": + version: 0.4.4 + resolution: "@types/glob-to-regexp@npm:0.4.4" + checksum: 10c0/7288ff853850d8302a8770a3698b187fc3970ad12ee6427f0b3758a3e7a0ebb0bd993abc6ebaaa979d09695b4194157d2bfaa7601b0fb9ed72c688b4c1298b88 + languageName: node + linkType: hard + "@types/hast@npm:^3.0.0, @types/hast@npm:^3.0.4": version: 3.0.4 resolution: "@types/hast@npm:3.0.4" @@ -9485,7 +9495,7 @@ __metadata: languageName: node linkType: hard -"@types/json-schema@npm:^7.0.15": +"@types/json-schema@npm:^7.0.15, @types/json-schema@npm:^7.0.8": version: 7.0.15 resolution: "@types/json-schema@npm:7.0.15" checksum: 10c0/a996a745e6c5d60292f36731dd41341339d4eeed8180bb09226e5c8d23759067692b1d88e5d91d72ee83dfc00d3aca8e7bd43ea120516c17922cbcb7c3e252db @@ -10528,6 +10538,15 @@ __metadata: languageName: node linkType: hard +"ajv-keywords@npm:^3.5.2": + version: 3.5.2 + resolution: "ajv-keywords@npm:3.5.2" + peerDependencies: + ajv: ^6.9.1 + checksum: 10c0/0c57a47cbd656e8cdfd99d7c2264de5868918ffa207c8d7a72a7f63379d4333254b2ba03d69e3c035e996a3fd3eb6d5725d7a1597cca10694296e32510546360 + languageName: node + linkType: hard + "ajv@npm:^6.12.4": version: 6.12.6 resolution: "ajv@npm:6.12.6" @@ -10540,7 +10559,7 @@ __metadata: languageName: node linkType: hard -"ajv@npm:^6.14.0": +"ajv@npm:^6.12.5, ajv@npm:^6.14.0": version: 6.14.0 resolution: "ajv@npm:6.14.0" dependencies: @@ -10953,6 +10972,13 @@ __metadata: languageName: node linkType: hard +"big.js@npm:^5.2.2": + version: 5.2.2 + resolution: "big.js@npm:5.2.2" + checksum: 10c0/230520f1ff920b2d2ce3e372d77a33faa4fa60d802fe01ca4ffbc321ee06023fe9a741ac02793ee778040a16b7e497f7d60c504d1c402b8fdab6f03bb785a25f + languageName: node + linkType: hard + "bignumber.js@npm:^9.0.0": version: 9.3.0 resolution: "bignumber.js@npm:9.3.0" @@ -12722,6 +12748,13 @@ __metadata: languageName: node linkType: hard +"emojis-list@npm:^3.0.0": + version: 3.0.0 + resolution: "emojis-list@npm:3.0.0" + checksum: 10c0/7dc4394b7b910444910ad64b812392159a21e1a7ecc637c775a440227dcb4f80eff7fe61f4453a7d7603fa23d23d30cc93fe9e4b5ed985b88d6441cd4a35117b + languageName: node + linkType: hard + "enabled@npm:2.0.x": version: 2.0.0 resolution: "enabled@npm:2.0.0" @@ -14572,6 +14605,13 @@ __metadata: languageName: node linkType: hard +"glob-to-regexp@npm:^0.4.1": + version: 0.4.1 + resolution: "glob-to-regexp@npm:0.4.1" + checksum: 10c0/0486925072d7a916f052842772b61c3e86247f0a80cc0deb9b5a3e8a1a9faad5b04fb6f58986a09f34d3e96cd2a22a24b7e9882fb1cf904c31e9a310de96c429 + languageName: node + linkType: hard + "glob@npm:^10.5.0": version: 10.5.0 resolution: "glob@npm:10.5.0" @@ -16031,7 +16071,7 @@ __metadata: languageName: node linkType: hard -"json5@npm:^2.2.1, json5@npm:^2.2.2, json5@npm:^2.2.3": +"json5@npm:^2.1.2, json5@npm:^2.2.1, json5@npm:^2.2.2, json5@npm:^2.2.3": version: 2.2.3 resolution: "json5@npm:2.2.3" bin: @@ -16280,6 +16320,17 @@ __metadata: languageName: node linkType: hard +"loader-utils@npm:^2.0.0": + version: 2.0.4 + resolution: "loader-utils@npm:2.0.4" + dependencies: + big.js: "npm:^5.2.2" + emojis-list: "npm:^3.0.0" + json5: "npm:^2.1.2" + checksum: 10c0/d5654a77f9d339ec2a03d88221a5a695f337bf71eb8dea031b3223420bb818964ba8ed0069145c19b095f6c8b8fd386e602a3fc7ca987042bd8bb1dcc90d7100 + languageName: node + linkType: hard + "locate-path@npm:^6.0.0": version: 6.0.0 resolution: "locate-path@npm:6.0.0" @@ -19124,6 +19175,18 @@ __metadata: languageName: node linkType: hard +"raw-loader@npm:^4.0.2": + version: 4.0.2 + resolution: "raw-loader@npm:4.0.2" + dependencies: + loader-utils: "npm:^2.0.0" + schema-utils: "npm:^3.0.0" + peerDependencies: + webpack: ^4.0.0 || ^5.0.0 + checksum: 10c0/981ebe65e1cee7230300d21ba6dcd8bd23ea81ef4ad2b167c0f62d93deba347f27921d330be848634baab3831cf9f38900af6082d6416c2e937fe612fa6a74ff + languageName: node + linkType: hard + "react-device-detect@npm:^2.2.3": version: 2.2.3 resolution: "react-device-detect@npm:2.2.3" @@ -20032,6 +20095,17 @@ __metadata: languageName: node linkType: hard +"schema-utils@npm:^3.0.0": + version: 3.3.0 + resolution: "schema-utils@npm:3.3.0" + dependencies: + "@types/json-schema": "npm:^7.0.8" + ajv: "npm:^6.12.5" + ajv-keywords: "npm:^3.5.2" + checksum: 10c0/fafdbde91ad8aa1316bc543d4b61e65ea86970aebbfb750bfb6d8a6c287a23e415e0e926c2498696b242f63af1aab8e585252637fabe811fd37b604351da6500 + languageName: node + linkType: hard + "scroll-into-view-if-needed@npm:^3.1.0": version: 3.1.0 resolution: "scroll-into-view-if-needed@npm:3.1.0"