Add sub-agent orchestration, MCP tools, and critical bug fixes

Major Features: - Sub-agent orchestration system with dynamic specialist spawning * spawn_sub_agent(): Create specialists with custom prompts * delegate(): Convenience method for task delegation * Cached specialists for reuse * Separate conversation histories and focused context - MCP (Model Context Protocol) tool integration * Zettelkasten: fleeting_note, daily_note, permanent_note, literature_note * Search: search_vault (hybrid search), search_by_tags * Web: web_fetch for real-time data * Zero-cost file/system operations on Pro subscription Critical Bug Fixes: - Fixed max tool iterations (15 → 30, configurable) - Fixed max_tokens error in Agent SDK query() call - Fixed MCP tool routing in execute_tool() * Routes zettelkasten + web tools to async handlers * Prevents "Unknown tool" errors Documentation: - SUB_AGENTS.md: Complete guide to sub-agent system - MCP_MIGRATION.md: Agent SDK migration details - SOUL.example.md: Sanitized bot identity template - scheduled_tasks.example.yaml: Sanitized task config template Security: - Added obsidian vault to .gitignore - Protected SOUL.md and MEMORY.md (personal configs) - Sanitized example configs with placeholders Dependencies: - Added beautifulsoup4, httpx, lxml for web scraping - Updated requirements.txt Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-16 07:43:31 -07:00
parent 911d362ba2
commit 50cf7165cb
11 changed files with 1987 additions and 103 deletions
--- a/llm_interface.py
+++ b/llm_interface.py
@@ -1,9 +1,21 @@
 """LLM Interface - Claude API, GLM, and other models.

 Supports three modes for Claude:
-1. Agent SDK (uses Pro subscription) - DEFAULT - Set USE_AGENT_SDK=true (default)
+1. Agent SDK (v0.1.36+) - DEFAULT - Uses query() API with Pro subscription
+   - Set USE_AGENT_SDK=true (default)
+   - Model: claude-sonnet-4-5-20250929 (default for all operations)
+   - Optional: USE_OPUS_FOR_TOOLS=true (enables Opus for extremely intensive tasks only)
+   - MCP Tools: File/system tools (read_file, write_file, edit_file, list_directory, run_command)
+   - Traditional Tools: Google tools & weather (fall back to Direct API, requires ANTHROPIC_API_KEY)
+   - Flat-rate subscription cost (no per-token charges for MCP tools)
+
 2. Direct API (pay-per-token) - Set USE_DIRECT_API=true
+   - Model: claude-sonnet-4-5-20250929 (cost-effective, never uses Opus)
+   - Requires ANTHROPIC_API_KEY in .env
+   - Full tool support built-in (all tools via traditional API)
+
 3. Legacy: Local Claude Code server - Set USE_CLAUDE_CODE_SERVER=true (deprecated)
+   - For backward compatibility only
 """

 import os
@@ -17,7 +29,13 @@ from usage_tracker import UsageTracker

 # Try to import Agent SDK (optional dependency)
 try:
-    from claude_agent_sdk import AgentSDK
+    from claude_agent_sdk import (
+        query,
+        UserMessage,
+        AssistantMessage,
+        SystemMessage,
+        ClaudeAgentOptions,
+    )
    import anyio
    AGENT_SDK_AVAILABLE = True
 except ImportError:
@@ -38,11 +56,15 @@ _USE_AGENT_SDK = os.getenv("USE_AGENT_SDK", "true").lower() == "true"

 # Default models by provider
 _DEFAULT_MODELS = {
-    "claude": "claude-haiku-4-5-20251001",  # For Direct API (pay-per-token)
-    "claude_agent_sdk": "claude-sonnet-4-5-20250929",  # For Agent SDK (flat-rate subscription)
+    "claude": "claude-sonnet-4-5-20250929",  # For Direct API (pay-per-token) - Sonnet is cost-effective
+    "claude_agent_sdk": "claude-sonnet-4-5-20250929",  # For Agent SDK (flat-rate) - Sonnet for normal operations
+    "claude_agent_sdk_opus": "claude-opus-4-6",  # For Agent SDK extremely intensive tasks only (flat-rate)
    "glm": "glm-4-plus",
 }

+# When to use Opus (only on Agent SDK flat-rate mode)
+_USE_OPUS_FOR_TOOLS = os.getenv("USE_OPUS_FOR_TOOLS", "false").lower() == "true"
+
 _GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"


@@ -60,7 +82,6 @@ class LLMInterface:
            _API_KEY_ENV_VARS.get(provider, ""),
        )
        self.client: Optional[Anthropic] = None
-        self.agent_sdk: Optional[Any] = None
        # Model will be set after determining mode

        # Determine mode (priority: direct API > legacy server > agent SDK)
@@ -96,7 +117,7 @@ class LLMInterface:
        if provider == "claude":
            if self.mode == "agent_sdk":
                print(f"[LLM] Using Claude Agent SDK (flat-rate subscription) with model: {self.model}")
-                self.agent_sdk = AgentSDK()
+                # No initialization needed - query() is a standalone function
            elif self.mode == "direct_api":
                print(f"[LLM] Using Direct API (pay-per-token) with model: {self.model}")
                self.client = Anthropic(api_key=self.api_key)
@@ -115,7 +136,7 @@ class LLMInterface:
        self,
        messages: List[Dict],
        system: Optional[str] = None,
-        max_tokens: int = 4096,
+        max_tokens: int = 16384,
    ) -> str:
        """Send chat request and get response.

@@ -126,8 +147,8 @@ class LLMInterface:
            # Agent SDK mode (Pro subscription)
            if self.mode == "agent_sdk":
                try:
-                    # Use anyio to bridge async SDK to sync interface
-                    response = anyio.from_thread.run(
+                    # Use anyio.run to create event loop for async SDK
+                    response = anyio.run(
                        self._agent_sdk_chat,
                        messages,
                        system,
@@ -208,15 +229,65 @@ class LLMInterface:
        max_tokens: int
    ) -> str:
        """Internal async method for Agent SDK chat (called via anyio bridge)."""
-        response = await self.agent_sdk.chat(
-            messages=messages,
-            system=system,
-            max_tokens=max_tokens,
-            model=self.model
+        # Convert messages to SDK format
+        sdk_messages = []
+        for msg in messages:
+            if msg["role"] == "user":
+                sdk_messages.append(UserMessage(content=msg["content"]))
+            elif msg["role"] == "assistant":
+                sdk_messages.append(AssistantMessage(content=msg["content"]))
+
+        # Add system message if provided
+        if system:
+            sdk_messages.insert(0, SystemMessage(content=system))
+
+        # Configure MCP server for file/system tools
+        try:
+            from mcp_tools import file_system_server
+
+            options = ClaudeAgentOptions(
+                mcp_servers={"file_system": file_system_server},
+                # Allow all MCP tools (file/system + web + zettelkasten)
+                allowed_tools=[
+                    "read_file",
+                    "write_file",
+                    "edit_file",
+                    "list_directory",
+                    "run_command",
+                    "web_fetch",
+                    "fleeting_note",
+                    "daily_note",
+                    "literature_note",
+                    "permanent_note",
+                    "search_vault",
+                    "search_by_tags",
+                ],
+            )
+        except ImportError:
+            # Fallback if mcp_tools not available
+            options = None
+
+        # Call the new query() API
+        # Note: Agent SDK handles max_tokens internally, don't pass it explicitly
+        response = await query(
+            messages=sdk_messages,
+            options=options,
+            # model parameter is handled by the SDK based on settings
        )
+
        # Extract text from response
-        if isinstance(response, dict):
-            return response.get("content", "")
+        if hasattr(response, "content"):
+            # Handle list of content blocks
+            if isinstance(response.content, list):
+                text_parts = []
+                for block in response.content:
+                    if hasattr(block, "text"):
+                        text_parts.append(block.text)
+                return "".join(text_parts)
+            # Handle single text content
+            elif isinstance(response.content, str):
+                return response.content
+
        return str(response)

    async def _agent_sdk_chat_with_tools(
@@ -226,17 +297,43 @@ class LLMInterface:
        system: Optional[str],
        max_tokens: int
    ) -> Message:
-        """Internal async method for Agent SDK chat with tools (called via anyio bridge)."""
-        response = await self.agent_sdk.chat(
+        """Internal async method for Agent SDK chat with tools (called via anyio bridge).
+
+        NOTE: The new Claude Agent SDK (v0.1.36+) uses MCP servers for tools.
+        For backward compatibility with the existing tool system, we fall back
+        to the Direct API for tool calls. This means tool calls will consume API tokens
+        even when Agent SDK mode is enabled.
+
+        Uses Sonnet by default. Opus can be enabled via USE_OPUS_FOR_TOOLS=true for
+        extremely intensive tasks (only recommended for Agent SDK flat-rate mode).
+        """
+        # Fallback to Direct API for tool calls (SDK tools use MCP servers)
+        from anthropic import Anthropic
+
+        if not self.api_key:
+            raise ValueError(
+                "ANTHROPIC_API_KEY required for tool calls in Agent SDK mode. "
+                "Set the API key in .env or migrate tools to MCP servers."
+            )
+
+        temp_client = Anthropic(api_key=self.api_key)
+
+        # Use Opus only if explicitly enabled (for intensive tasks on flat-rate)
+        # Otherwise default to Sonnet (cost-effective for normal tool operations)
+        if _USE_OPUS_FOR_TOOLS and self.mode == "agent_sdk":
+            model = _DEFAULT_MODELS.get("claude_agent_sdk_opus", "claude-opus-4-6")
+        else:
+            model = self.model  # Use Sonnet (default)
+
+        response = temp_client.messages.create(
+            model=model,
+            max_tokens=max_tokens,
+            system=system or "",
            messages=messages,
            tools=tools,
-            system=system,
-            max_tokens=max_tokens,
-            model=self.model
        )

-        # Convert Agent SDK response to anthropic.types.Message format
-        return self._convert_sdk_response_to_message(response)
+        return response

    def _convert_sdk_response_to_message(self, sdk_response: Dict[str, Any]) -> Message:
        """Convert Agent SDK response to anthropic.types.Message format.
@@ -302,7 +399,7 @@ class LLMInterface:
        messages: List[Dict],
        tools: List[Dict[str, Any]],
        system: Optional[str] = None,
-        max_tokens: int = 4096,
+        max_tokens: int = 16384,
        use_cache: bool = False,
    ) -> Message:
        """Send chat request with tool support. Returns full Message object.
@@ -316,8 +413,8 @@ class LLMInterface:
        # Agent SDK mode (Pro subscription)
        if self.mode == "agent_sdk":
            try:
-                # Use anyio to bridge async SDK to sync interface
-                response = anyio.from_thread.run(
+                # Use anyio.run to create event loop for async SDK
+                response = anyio.run(
                    self._agent_sdk_chat_with_tools,
                    messages,
                    tools,