feat: lazy tool loading — 31→11 tools, saves ~6K tokens per message

GregsGreyCode · claude · GregsGreyCode · commit bcf1220fa0ff · 2026-03-29T01:02:45.000Z
On models with ≤32K context, the agent now starts with 11 core tools
(~3.5K tokens) instead of all 31 tools (~9.5K tokens). Extended tools
are loaded on demand via the new request_tools() meta-tool.

Core tools (always loaded):
  terminal, read_file, write_file, patch, search_files, memory,
  todo, clarify, request_tools, request_mcp_access, get_mcp_catalogue

Extended tools (loaded via request_tools("category")):
  web, browser, image, vision, tts, delegation, code, workflows,
  cron, messaging, logs, skills, process, bugs, session

Token impact for a "Hi!" on a 16K model:
  Before: system(8K) + tools(9.5K) + msg(1) = 17.5K (EXCEEDS 16K)
  After:  system(8K) + tools(3.5K) + msg(1) = 11.5K (fits with 4.5K spare)

Implementation:
- tools/request_tools_tool.py (NEW): request_tools meta-tool with
  per-session tool grants. Same pattern as request_mcp_access.
- tools/registry.py: get_definitions now checks requires_env —
  tools with missing API keys are excluded from schemas (Option 1)
- core/model_tools.py: get_tool_definitions accepts lazy=True and
  session_id params. In lazy mode, intersects enabled toolset with
  CORE_TOOLS + session grants.
- agents/hermes/agent.py: auto-enables lazy mode when context ≤32K.
  After request_tools() call, rebuilds tool list to include newly
  granted tools for the next API call.

Cloud models (128K+) are unaffected — they get all 31 tools as before.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/agents/hermes/agent.py b/agents/hermes/agent.py
@@ -614,12 +614,23 @@ def __init__(
             if fb_p and fb_m and not self.quiet_mode:
                 print(f"🔄 Fallback model: {fb_m} ({fb_p})")
 
-        # Get available tools with filtering
+        # Get available tools with filtering.
+        # Lazy mode: when context is tight (≤32K), start with core tools only.
+        # The agent can call request_tools() to load more when needed.
+        _ctx_for_lazy = self.context_compressor.context_length if hasattr(self, 'context_compressor') else 128000
+        _use_lazy = _ctx_for_lazy <= 32768
+        self._lazy_tools = _use_lazy
+        self._enabled_toolsets = enabled_toolsets
+        self._disabled_toolsets = disabled_toolsets
         self.tools = get_tool_definitions(
             enabled_toolsets=enabled_toolsets,
             disabled_toolsets=disabled_toolsets,
             quiet_mode=self.quiet_mode,
+            lazy=_use_lazy,
+            session_id=self.session_id,
         )
+        if _use_lazy and not self.quiet_mode:
+            print(f"💡 Lazy tool loading (context {_ctx_for_lazy:,} ≤ 32K): {len(self.tools)} core tools loaded. Use request_tools() for more.")
         
         # Show tool configuration and store valid tool names for validation
         self.valid_tool_names = set()
@@ -1641,6 +1652,8 @@ def _activate_honcho(
             enabled_toolsets=enabled_toolsets,
             disabled_toolsets=disabled_toolsets,
             quiet_mode=True,
+            lazy=self._lazy_tools,
+            session_id=self.session_id,
         )
         self.valid_tool_names = {
             tool["function"]["name"] for tool in self.tools
@@ -3629,7 +3642,7 @@ def _invoke_tool(self, function_name: str, function_args: dict, effective_task_i
             )
         else:
             from tools.registry import registry as _registry
-            return _registry.dispatch(
+            _result = _registry.dispatch(
                 function_name, function_args,
                 policy=self._action_policy,
                 session_id=self.session_id,
@@ -3638,6 +3651,22 @@ def _invoke_tool(self, function_name: str, function_args: dict, effective_task_i
                 task_id=effective_task_id,
                 enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
             )
+            # Lazy tool loading: if request_tools was called, refresh the tool
+            # list so newly granted tools appear in the next API call.
+            if function_name == "request_tools" and self._lazy_tools and self.session_id:
+                try:
+                    self.tools = get_tool_definitions(
+                        enabled_toolsets=self._enabled_toolsets,
+                        disabled_toolsets=self._disabled_toolsets,
+                        quiet_mode=True,
+                        lazy=True,
+                        session_id=self.session_id,
+                    )
+                    self.valid_tool_names = {t["function"]["name"] for t in self.tools}
+                    logger.info("request_tools: refreshed tool list → %d tools", len(self.tools))
+                except Exception:
+                    pass
+            return _result
 
     def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
         """Execute multiple tool calls concurrently using a thread pool.
diff --git a/core/model_tools.py b/core/model_tools.py
@@ -102,6 +102,8 @@ def _discover_tools():
         # Gateway MCP access tool — request_mcp_access + get_mcp_catalogue
         # Always registered so agents can request MCP access regardless of mode.
         "tools.mcp_access_tool",
+        # Lazy tool loading — request_tools meta-tool for on-demand tool injection.
+        "tools.request_tools_tool",
     ]
     import importlib
     for mod_name in _modules:
@@ -178,6 +180,8 @@ def get_tool_definitions(
     enabled_toolsets: List[str] = None,
     disabled_toolsets: List[str] = None,
     quiet_mode: bool = False,
+    lazy: bool = False,
+    session_id: str = None,
 ) -> List[Dict[str, Any]]:
     """
     Get tool definitions for model API calls with toolset-based filtering.
@@ -188,6 +192,9 @@ def get_tool_definitions(
         enabled_toolsets: Only include tools from these toolsets.
         disabled_toolsets: Exclude tools from these toolsets (if enabled_toolsets is None).
         quiet_mode: Suppress status prints.
+        lazy: If True, only return core tools + request_tools + session-granted tools.
+              Extended tools are loaded on demand via request_tools().
+        session_id: Session ID for looking up dynamically granted tools.
 
     Returns:
         Filtered list of OpenAI-format tool definitions.
@@ -235,7 +242,26 @@ def get_tool_definitions(
         for ts_name in get_all_toolsets():
             tools_to_include.update(resolve_toolset(ts_name))
 
-    # Ask the registry for schemas (only returns tools whose check_fn passes)
+    # Lazy mode: restrict to core tools + request_tools + session-granted tools.
+    # Core tools are always included even if they're not in the platform toolset.
+    if lazy and tools_to_include:
+        from tools.request_tools_tool import CORE_TOOLS, get_granted_tools
+        _core = set(CORE_TOOLS)
+        _core.add("request_tools")       # always include the meta-tool
+        _core.add("request_mcp_access")  # always include MCP access
+        _core.add("get_mcp_catalogue")   # always include MCP catalogue
+        if session_id:
+            _core |= set(get_granted_tools(session_id))
+        # Keep only core + granted tools from the enabled set, plus force-include
+        # the meta-tools even if they're not in the platform toolset.
+        _allowed = (tools_to_include & _core) | {"request_tools", "request_mcp_access", "get_mcp_catalogue"}
+        _full_count = len(tools_to_include)
+        tools_to_include = _allowed
+        if not quiet_mode:
+            logger.info("Lazy tool loading: %d → %d tools (extended available via request_tools)", _full_count, len(tools_to_include))
+
+    # Ask the registry for schemas (only returns tools whose check_fn passes
+    # and whose requires_env keys are present)
     filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode)
 
     # Rebuild execute_code schema to only list sandbox tools that are actually
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "logos"
-version = "0.6.2"
+version = "0.6.3"
 description = "A self-hosted agent platform — inference routing, multi-model benchmarking, and policy-governed agent runs across local and cloud hardware"
 readme = "README.md"
 requires-python = ">=3.11"
diff --git a/tools/registry.py b/tools/registry.py
@@ -85,13 +85,22 @@ def get_definitions(self, tool_names: Set[str], quiet: bool = False) -> List[dic
         """Return OpenAI-format tool schemas for the requested tool names.
 
         Only tools whose ``check_fn()`` returns True (or have no check_fn)
+        AND whose ``requires_env`` keys are all present in os.environ
         are included.
         """
+        import os as _os
         result = []
         for name in sorted(tool_names):
             entry = self._tools.get(name)
             if not entry:
                 continue
+            # Skip tools with unmet environment requirements
+            if entry.requires_env:
+                missing = [k for k in entry.requires_env if not _os.environ.get(k)]
+                if missing:
+                    if not quiet:
+                        logger.debug("Tool %s unavailable (missing env: %s)", name, missing)
+                    continue
             if entry.check_fn:
                 try:
                     if not entry.check_fn():
diff --git a/tools/request_tools_tool.py b/tools/request_tools_tool.py
@@ -0,0 +1,183 @@
+"""
+request_tools — lazy tool loading meta-tool.
+
+Agents start with a small set of core tools to keep token usage low.
+When they need additional capabilities (web search, browser, image gen,
+etc.), they call request_tools(categories) to inject those tool schemas
+into the next API call.
+
+This follows the same pattern as request_mcp_access — the agent asks
+for what it needs, the gateway provides it.
+
+Core tools (~2-3K tokens, always loaded):
+  terminal, read_file, write_file, patch, search_files,
+  memory, todo, clarify, request_tools
+
+Extended tools (~6-7K tokens, loaded on demand):
+  web, browser, image, vision, tts, delegation, code,
+  workflows, cron, messaging, logs, skills, process, bugs
+
+The split point is: can the agent have a useful conversation with just
+the core tools? Yes — it can read/write files, run commands, search
+code, and remember things. The extended tools are for specific tasks.
+"""
+
+import json
+import logging
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Tool categories → toolset names (maps user-friendly names to registry IDs)
+# ---------------------------------------------------------------------------
+
+TOOL_CATEGORIES = {
+    "web":        {"tools": ["web_search", "web_extract"], "description": "Web search and content extraction (Firecrawl)"},
+    "browser":    {"tools": ["browser_navigate", "browser_click", "browser_type", "browser_snapshot", "browser_scroll", "browser_press", "browser_back", "browser_close", "browser_get_images", "browser_vision", "browser_console"], "description": "Browser automation"},
+    "image":      {"tools": ["image_generate"], "description": "Image generation (fal.ai)"},
+    "vision":     {"tools": ["vision_analyze"], "description": "Image analysis using AI vision"},
+    "tts":        {"tools": ["text_to_speech"], "description": "Text-to-speech audio generation"},
+    "delegation": {"tools": ["delegate_task", "execute_code", "mixture_of_agents"], "description": "Subagent spawning and programmatic tool calling"},
+    "workflows":  {"tools": ["workflow"], "description": "Multi-step DAG task workflows"},
+    "cron":       {"tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"], "description": "Scheduled task management"},
+    "messaging":  {"tools": ["send_message"], "description": "Cross-platform message delivery"},
+    "logs":       {"tools": ["log_inspector"], "description": "Runtime log analysis"},
+    "skills":     {"tools": ["skill_manage", "skill_view", "skills_list"], "description": "Skill management and browsing"},
+    "process":    {"tools": ["process"], "description": "Background process management"},
+    "bugs":       {"tools": ["bug_notes"], "description": "Self-reported bug tracking"},
+    "session":    {"tools": ["session_search"], "description": "Long-term conversation memory search"},
+}
+
+# Core tools — always loaded regardless of lazy mode
+CORE_TOOLS = frozenset({
+    "terminal",
+    "read_file",
+    "write_file",
+    "patch",
+    "search_files",
+    "memory",
+    "todo",
+    "clarify",
+})
+
+# ---------------------------------------------------------------------------
+# Session-level tool grants (same pattern as mcp_access)
+# ---------------------------------------------------------------------------
+
+import threading
+
+_lock = threading.Lock()
+_session_tools: dict[str, set[str]] = {}  # session_id → set of granted tool names
+
+
+def grant_tools(session_id: str, tool_names: list[str]) -> None:
+    """Grant additional tools to a session."""
+    with _lock:
+        if session_id not in _session_tools:
+            _session_tools[session_id] = set()
+        _session_tools[session_id].update(tool_names)
+
+
+def get_granted_tools(session_id: str) -> frozenset[str]:
+    """Return the set of tools granted to this session beyond core."""
+    with _lock:
+        return frozenset(_session_tools.get(session_id, set()))
+
+
+def clear_session(session_id: str) -> None:
+    """Clean up when session ends."""
+    with _lock:
+        _session_tools.pop(session_id, None)
+
+
+# ---------------------------------------------------------------------------
+# Handler
+# ---------------------------------------------------------------------------
+
+_TOOL_NAME = "request_tools"
+
+
+def _handler(args: dict, **kwargs) -> str:
+    categories = args.get("categories") or []
+    session_id = kwargs.get("session_id")
+
+    if not categories:
+        # List available categories
+        cat_list = []
+        for cat, info in TOOL_CATEGORIES.items():
+            cat_list.append(f"  {cat}: {info['description']} ({len(info['tools'])} tools)")
+        return json.dumps({
+            "available_categories": list(TOOL_CATEGORIES.keys()),
+            "details": "\n".join(cat_list),
+            "message": "Call request_tools with the categories you need.",
+        })
+
+    granted = []
+    not_found = []
+    for cat in categories:
+        cat = cat.strip().lower()
+        if cat in TOOL_CATEGORIES:
+            tools = TOOL_CATEGORIES[cat]["tools"]
+            if session_id:
+                grant_tools(session_id, tools)
+            granted.extend(tools)
+            logger.info("request_tools: granted %s tools to session %s: %s", cat, session_id, tools)
+        else:
+            not_found.append(cat)
+
+    result = {
+        "status": "granted",
+        "tools_added": granted,
+        "message": f"Added {len(granted)} tools. They will be available from your next message.",
+    }
+    if not_found:
+        result["not_found"] = not_found
+        result["available_categories"] = list(TOOL_CATEGORIES.keys())
+
+    return json.dumps(result)
+
+
+# ---------------------------------------------------------------------------
+# Self-registration
+# ---------------------------------------------------------------------------
+
+def _register():
+    try:
+        from tools.registry import registry
+
+        cat_names = ", ".join(TOOL_CATEGORIES.keys())
+        schema = {
+            "name": _TOOL_NAME,
+            "description": (
+                "Request additional tool capabilities beyond the core set. "
+                f"Available categories: {cat_names}. "
+                "Call with no arguments to see descriptions. "
+                "Tools are added to your session and available from the next message."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "categories": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": f"Tool categories to load. Available: {cat_names}",
+                    },
+                },
+                "required": [],
+            },
+        }
+
+        registry.register(
+            name=_TOOL_NAME,
+            toolset="core",
+            schema=schema,
+            handler=_handler,
+            is_async=False,
+            description=schema["description"],
+        )
+        logger.debug("request_tools: registered")
+    except Exception as exc:
+        logger.debug("request_tools: registration failed: %s", exc)
+
+
+_register()