Add sub-agent orchestration, MCP tools, and critical bug fixes

Major Features:
- Sub-agent orchestration system with dynamic specialist spawning
  * spawn_sub_agent(): Create specialists with custom prompts
  * delegate(): Convenience method for task delegation
  * Cached specialists for reuse
  * Separate conversation histories and focused context

- MCP (Model Context Protocol) tool integration
  * Zettelkasten: fleeting_note, daily_note, permanent_note, literature_note
  * Search: search_vault (hybrid search), search_by_tags
  * Web: web_fetch for real-time data
  * Zero-cost file/system operations on Pro subscription

Critical Bug Fixes:
- Fixed max tool iterations (15 → 30, configurable)
- Fixed max_tokens error in Agent SDK query() call
- Fixed MCP tool routing in execute_tool()
  * Routes zettelkasten + web tools to async handlers
  * Prevents "Unknown tool" errors

Documentation:
- SUB_AGENTS.md: Complete guide to sub-agent system
- MCP_MIGRATION.md: Agent SDK migration details
- SOUL.example.md: Sanitized bot identity template
- scheduled_tasks.example.yaml: Sanitized task config template

Security:
- Added obsidian vault to .gitignore
- Protected SOUL.md and MEMORY.md (personal configs)
- Sanitized example configs with placeholders

Dependencies:
- Added beautifulsoup4, httpx, lxml for web scraping
- Updated requirements.txt

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 07:43:31 -07:00
parent 911d362ba2
commit 50cf7165cb
11 changed files with 1987 additions and 103 deletions

View File

@@ -1,9 +1,21 @@
"""LLM Interface - Claude API, GLM, and other models.
Supports three modes for Claude:
1. Agent SDK (uses Pro subscription) - DEFAULT - Set USE_AGENT_SDK=true (default)
1. Agent SDK (v0.1.36+) - DEFAULT - Uses query() API with Pro subscription
- Set USE_AGENT_SDK=true (default)
- Model: claude-sonnet-4-5-20250929 (default for all operations)
- Optional: USE_OPUS_FOR_TOOLS=true (enables Opus for extremely intensive tasks only)
- MCP Tools: File/system tools (read_file, write_file, edit_file, list_directory, run_command)
- Traditional Tools: Google tools & weather (fall back to Direct API, requires ANTHROPIC_API_KEY)
- Flat-rate subscription cost (no per-token charges for MCP tools)
2. Direct API (pay-per-token) - Set USE_DIRECT_API=true
- Model: claude-sonnet-4-5-20250929 (cost-effective, never uses Opus)
- Requires ANTHROPIC_API_KEY in .env
- Full tool support built-in (all tools via traditional API)
3. Legacy: Local Claude Code server - Set USE_CLAUDE_CODE_SERVER=true (deprecated)
- For backward compatibility only
"""
import os
@@ -17,7 +29,13 @@ from usage_tracker import UsageTracker
# Try to import Agent SDK (optional dependency)
try:
from claude_agent_sdk import AgentSDK
from claude_agent_sdk import (
query,
UserMessage,
AssistantMessage,
SystemMessage,
ClaudeAgentOptions,
)
import anyio
AGENT_SDK_AVAILABLE = True
except ImportError:
@@ -38,11 +56,15 @@ _USE_AGENT_SDK = os.getenv("USE_AGENT_SDK", "true").lower() == "true"
# Default models by provider
_DEFAULT_MODELS = {
"claude": "claude-haiku-4-5-20251001", # For Direct API (pay-per-token)
"claude_agent_sdk": "claude-sonnet-4-5-20250929", # For Agent SDK (flat-rate subscription)
"claude": "claude-sonnet-4-5-20250929", # For Direct API (pay-per-token) - Sonnet is cost-effective
"claude_agent_sdk": "claude-sonnet-4-5-20250929", # For Agent SDK (flat-rate) - Sonnet for normal operations
"claude_agent_sdk_opus": "claude-opus-4-6", # For Agent SDK extremely intensive tasks only (flat-rate)
"glm": "glm-4-plus",
}
# When to use Opus (only on Agent SDK flat-rate mode)
_USE_OPUS_FOR_TOOLS = os.getenv("USE_OPUS_FOR_TOOLS", "false").lower() == "true"
_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
@@ -60,7 +82,6 @@ class LLMInterface:
_API_KEY_ENV_VARS.get(provider, ""),
)
self.client: Optional[Anthropic] = None
self.agent_sdk: Optional[Any] = None
# Model will be set after determining mode
# Determine mode (priority: direct API > legacy server > agent SDK)
@@ -96,7 +117,7 @@ class LLMInterface:
if provider == "claude":
if self.mode == "agent_sdk":
print(f"[LLM] Using Claude Agent SDK (flat-rate subscription) with model: {self.model}")
self.agent_sdk = AgentSDK()
# No initialization needed - query() is a standalone function
elif self.mode == "direct_api":
print(f"[LLM] Using Direct API (pay-per-token) with model: {self.model}")
self.client = Anthropic(api_key=self.api_key)
@@ -115,7 +136,7 @@ class LLMInterface:
self,
messages: List[Dict],
system: Optional[str] = None,
max_tokens: int = 4096,
max_tokens: int = 16384,
) -> str:
"""Send chat request and get response.
@@ -126,8 +147,8 @@ class LLMInterface:
# Agent SDK mode (Pro subscription)
if self.mode == "agent_sdk":
try:
# Use anyio to bridge async SDK to sync interface
response = anyio.from_thread.run(
# Use anyio.run to create event loop for async SDK
response = anyio.run(
self._agent_sdk_chat,
messages,
system,
@@ -208,15 +229,65 @@ class LLMInterface:
max_tokens: int
) -> str:
"""Internal async method for Agent SDK chat (called via anyio bridge)."""
response = await self.agent_sdk.chat(
messages=messages,
system=system,
max_tokens=max_tokens,
model=self.model
# Convert messages to SDK format
sdk_messages = []
for msg in messages:
if msg["role"] == "user":
sdk_messages.append(UserMessage(content=msg["content"]))
elif msg["role"] == "assistant":
sdk_messages.append(AssistantMessage(content=msg["content"]))
# Add system message if provided
if system:
sdk_messages.insert(0, SystemMessage(content=system))
# Configure MCP server for file/system tools
try:
from mcp_tools import file_system_server
options = ClaudeAgentOptions(
mcp_servers={"file_system": file_system_server},
# Allow all MCP tools (file/system + web + zettelkasten)
allowed_tools=[
"read_file",
"write_file",
"edit_file",
"list_directory",
"run_command",
"web_fetch",
"fleeting_note",
"daily_note",
"literature_note",
"permanent_note",
"search_vault",
"search_by_tags",
],
)
except ImportError:
# Fallback if mcp_tools not available
options = None
# Call the new query() API
# Note: Agent SDK handles max_tokens internally, don't pass it explicitly
response = await query(
messages=sdk_messages,
options=options,
# model parameter is handled by the SDK based on settings
)
# Extract text from response
if isinstance(response, dict):
return response.get("content", "")
if hasattr(response, "content"):
# Handle list of content blocks
if isinstance(response.content, list):
text_parts = []
for block in response.content:
if hasattr(block, "text"):
text_parts.append(block.text)
return "".join(text_parts)
# Handle single text content
elif isinstance(response.content, str):
return response.content
return str(response)
async def _agent_sdk_chat_with_tools(
@@ -226,17 +297,43 @@ class LLMInterface:
system: Optional[str],
max_tokens: int
) -> Message:
"""Internal async method for Agent SDK chat with tools (called via anyio bridge)."""
response = await self.agent_sdk.chat(
"""Internal async method for Agent SDK chat with tools (called via anyio bridge).
NOTE: The new Claude Agent SDK (v0.1.36+) uses MCP servers for tools.
For backward compatibility with the existing tool system, we fall back
to the Direct API for tool calls. This means tool calls will consume API tokens
even when Agent SDK mode is enabled.
Uses Sonnet by default. Opus can be enabled via USE_OPUS_FOR_TOOLS=true for
extremely intensive tasks (only recommended for Agent SDK flat-rate mode).
"""
# Fallback to Direct API for tool calls (SDK tools use MCP servers)
from anthropic import Anthropic
if not self.api_key:
raise ValueError(
"ANTHROPIC_API_KEY required for tool calls in Agent SDK mode. "
"Set the API key in .env or migrate tools to MCP servers."
)
temp_client = Anthropic(api_key=self.api_key)
# Use Opus only if explicitly enabled (for intensive tasks on flat-rate)
# Otherwise default to Sonnet (cost-effective for normal tool operations)
if _USE_OPUS_FOR_TOOLS and self.mode == "agent_sdk":
model = _DEFAULT_MODELS.get("claude_agent_sdk_opus", "claude-opus-4-6")
else:
model = self.model # Use Sonnet (default)
response = temp_client.messages.create(
model=model,
max_tokens=max_tokens,
system=system or "",
messages=messages,
tools=tools,
system=system,
max_tokens=max_tokens,
model=self.model
)
# Convert Agent SDK response to anthropic.types.Message format
return self._convert_sdk_response_to_message(response)
return response
def _convert_sdk_response_to_message(self, sdk_response: Dict[str, Any]) -> Message:
"""Convert Agent SDK response to anthropic.types.Message format.
@@ -302,7 +399,7 @@ class LLMInterface:
messages: List[Dict],
tools: List[Dict[str, Any]],
system: Optional[str] = None,
max_tokens: int = 4096,
max_tokens: int = 16384,
use_cache: bool = False,
) -> Message:
"""Send chat request with tool support. Returns full Message object.
@@ -316,8 +413,8 @@ class LLMInterface:
# Agent SDK mode (Pro subscription)
if self.mode == "agent_sdk":
try:
# Use anyio to bridge async SDK to sync interface
response = anyio.from_thread.run(
# Use anyio.run to create event loop for async SDK
response = anyio.run(
self._agent_sdk_chat_with_tools,
messages,
tools,