Add sub-agent orchestration, MCP tools, and critical bug fixes
Major Features: - Sub-agent orchestration system with dynamic specialist spawning * spawn_sub_agent(): Create specialists with custom prompts * delegate(): Convenience method for task delegation * Cached specialists for reuse * Separate conversation histories and focused context - MCP (Model Context Protocol) tool integration * Zettelkasten: fleeting_note, daily_note, permanent_note, literature_note * Search: search_vault (hybrid search), search_by_tags * Web: web_fetch for real-time data * Zero-cost file/system operations on Pro subscription Critical Bug Fixes: - Fixed max tool iterations (15 → 30, configurable) - Fixed max_tokens error in Agent SDK query() call - Fixed MCP tool routing in execute_tool() * Routes zettelkasten + web tools to async handlers * Prevents "Unknown tool" errors Documentation: - SUB_AGENTS.md: Complete guide to sub-agent system - MCP_MIGRATION.md: Agent SDK migration details - SOUL.example.md: Sanitized bot identity template - scheduled_tasks.example.yaml: Sanitized task config template Security: - Added obsidian vault to .gitignore - Protected SOUL.md and MEMORY.md (personal configs) - Sanitized example configs with placeholders Dependencies: - Added beautifulsoup4, httpx, lxml for web scraping - Updated requirements.txt Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
149
llm_interface.py
149
llm_interface.py
@@ -1,9 +1,21 @@
|
||||
"""LLM Interface - Claude API, GLM, and other models.
|
||||
|
||||
Supports three modes for Claude:
|
||||
1. Agent SDK (uses Pro subscription) - DEFAULT - Set USE_AGENT_SDK=true (default)
|
||||
1. Agent SDK (v0.1.36+) - DEFAULT - Uses query() API with Pro subscription
|
||||
- Set USE_AGENT_SDK=true (default)
|
||||
- Model: claude-sonnet-4-5-20250929 (default for all operations)
|
||||
- Optional: USE_OPUS_FOR_TOOLS=true (enables Opus for extremely intensive tasks only)
|
||||
- MCP Tools: File/system tools (read_file, write_file, edit_file, list_directory, run_command)
|
||||
- Traditional Tools: Google tools & weather (fall back to Direct API, requires ANTHROPIC_API_KEY)
|
||||
- Flat-rate subscription cost (no per-token charges for MCP tools)
|
||||
|
||||
2. Direct API (pay-per-token) - Set USE_DIRECT_API=true
|
||||
- Model: claude-sonnet-4-5-20250929 (cost-effective, never uses Opus)
|
||||
- Requires ANTHROPIC_API_KEY in .env
|
||||
- Full tool support built-in (all tools via traditional API)
|
||||
|
||||
3. Legacy: Local Claude Code server - Set USE_CLAUDE_CODE_SERVER=true (deprecated)
|
||||
- For backward compatibility only
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -17,7 +29,13 @@ from usage_tracker import UsageTracker
|
||||
|
||||
# Try to import Agent SDK (optional dependency)
|
||||
try:
|
||||
from claude_agent_sdk import AgentSDK
|
||||
from claude_agent_sdk import (
|
||||
query,
|
||||
UserMessage,
|
||||
AssistantMessage,
|
||||
SystemMessage,
|
||||
ClaudeAgentOptions,
|
||||
)
|
||||
import anyio
|
||||
AGENT_SDK_AVAILABLE = True
|
||||
except ImportError:
|
||||
@@ -38,11 +56,15 @@ _USE_AGENT_SDK = os.getenv("USE_AGENT_SDK", "true").lower() == "true"
|
||||
|
||||
# Default models by provider
|
||||
_DEFAULT_MODELS = {
|
||||
"claude": "claude-haiku-4-5-20251001", # For Direct API (pay-per-token)
|
||||
"claude_agent_sdk": "claude-sonnet-4-5-20250929", # For Agent SDK (flat-rate subscription)
|
||||
"claude": "claude-sonnet-4-5-20250929", # For Direct API (pay-per-token) - Sonnet is cost-effective
|
||||
"claude_agent_sdk": "claude-sonnet-4-5-20250929", # For Agent SDK (flat-rate) - Sonnet for normal operations
|
||||
"claude_agent_sdk_opus": "claude-opus-4-6", # For Agent SDK extremely intensive tasks only (flat-rate)
|
||||
"glm": "glm-4-plus",
|
||||
}
|
||||
|
||||
# When to use Opus (only on Agent SDK flat-rate mode)
|
||||
_USE_OPUS_FOR_TOOLS = os.getenv("USE_OPUS_FOR_TOOLS", "false").lower() == "true"
|
||||
|
||||
_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
|
||||
|
||||
|
||||
@@ -60,7 +82,6 @@ class LLMInterface:
|
||||
_API_KEY_ENV_VARS.get(provider, ""),
|
||||
)
|
||||
self.client: Optional[Anthropic] = None
|
||||
self.agent_sdk: Optional[Any] = None
|
||||
# Model will be set after determining mode
|
||||
|
||||
# Determine mode (priority: direct API > legacy server > agent SDK)
|
||||
@@ -96,7 +117,7 @@ class LLMInterface:
|
||||
if provider == "claude":
|
||||
if self.mode == "agent_sdk":
|
||||
print(f"[LLM] Using Claude Agent SDK (flat-rate subscription) with model: {self.model}")
|
||||
self.agent_sdk = AgentSDK()
|
||||
# No initialization needed - query() is a standalone function
|
||||
elif self.mode == "direct_api":
|
||||
print(f"[LLM] Using Direct API (pay-per-token) with model: {self.model}")
|
||||
self.client = Anthropic(api_key=self.api_key)
|
||||
@@ -115,7 +136,7 @@ class LLMInterface:
|
||||
self,
|
||||
messages: List[Dict],
|
||||
system: Optional[str] = None,
|
||||
max_tokens: int = 4096,
|
||||
max_tokens: int = 16384,
|
||||
) -> str:
|
||||
"""Send chat request and get response.
|
||||
|
||||
@@ -126,8 +147,8 @@ class LLMInterface:
|
||||
# Agent SDK mode (Pro subscription)
|
||||
if self.mode == "agent_sdk":
|
||||
try:
|
||||
# Use anyio to bridge async SDK to sync interface
|
||||
response = anyio.from_thread.run(
|
||||
# Use anyio.run to create event loop for async SDK
|
||||
response = anyio.run(
|
||||
self._agent_sdk_chat,
|
||||
messages,
|
||||
system,
|
||||
@@ -208,15 +229,65 @@ class LLMInterface:
|
||||
max_tokens: int
|
||||
) -> str:
|
||||
"""Internal async method for Agent SDK chat (called via anyio bridge)."""
|
||||
response = await self.agent_sdk.chat(
|
||||
messages=messages,
|
||||
system=system,
|
||||
max_tokens=max_tokens,
|
||||
model=self.model
|
||||
# Convert messages to SDK format
|
||||
sdk_messages = []
|
||||
for msg in messages:
|
||||
if msg["role"] == "user":
|
||||
sdk_messages.append(UserMessage(content=msg["content"]))
|
||||
elif msg["role"] == "assistant":
|
||||
sdk_messages.append(AssistantMessage(content=msg["content"]))
|
||||
|
||||
# Add system message if provided
|
||||
if system:
|
||||
sdk_messages.insert(0, SystemMessage(content=system))
|
||||
|
||||
# Configure MCP server for file/system tools
|
||||
try:
|
||||
from mcp_tools import file_system_server
|
||||
|
||||
options = ClaudeAgentOptions(
|
||||
mcp_servers={"file_system": file_system_server},
|
||||
# Allow all MCP tools (file/system + web + zettelkasten)
|
||||
allowed_tools=[
|
||||
"read_file",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"list_directory",
|
||||
"run_command",
|
||||
"web_fetch",
|
||||
"fleeting_note",
|
||||
"daily_note",
|
||||
"literature_note",
|
||||
"permanent_note",
|
||||
"search_vault",
|
||||
"search_by_tags",
|
||||
],
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback if mcp_tools not available
|
||||
options = None
|
||||
|
||||
# Call the new query() API
|
||||
# Note: Agent SDK handles max_tokens internally, don't pass it explicitly
|
||||
response = await query(
|
||||
messages=sdk_messages,
|
||||
options=options,
|
||||
# model parameter is handled by the SDK based on settings
|
||||
)
|
||||
|
||||
# Extract text from response
|
||||
if isinstance(response, dict):
|
||||
return response.get("content", "")
|
||||
if hasattr(response, "content"):
|
||||
# Handle list of content blocks
|
||||
if isinstance(response.content, list):
|
||||
text_parts = []
|
||||
for block in response.content:
|
||||
if hasattr(block, "text"):
|
||||
text_parts.append(block.text)
|
||||
return "".join(text_parts)
|
||||
# Handle single text content
|
||||
elif isinstance(response.content, str):
|
||||
return response.content
|
||||
|
||||
return str(response)
|
||||
|
||||
async def _agent_sdk_chat_with_tools(
|
||||
@@ -226,17 +297,43 @@ class LLMInterface:
|
||||
system: Optional[str],
|
||||
max_tokens: int
|
||||
) -> Message:
|
||||
"""Internal async method for Agent SDK chat with tools (called via anyio bridge)."""
|
||||
response = await self.agent_sdk.chat(
|
||||
"""Internal async method for Agent SDK chat with tools (called via anyio bridge).
|
||||
|
||||
NOTE: The new Claude Agent SDK (v0.1.36+) uses MCP servers for tools.
|
||||
For backward compatibility with the existing tool system, we fall back
|
||||
to the Direct API for tool calls. This means tool calls will consume API tokens
|
||||
even when Agent SDK mode is enabled.
|
||||
|
||||
Uses Sonnet by default. Opus can be enabled via USE_OPUS_FOR_TOOLS=true for
|
||||
extremely intensive tasks (only recommended for Agent SDK flat-rate mode).
|
||||
"""
|
||||
# Fallback to Direct API for tool calls (SDK tools use MCP servers)
|
||||
from anthropic import Anthropic
|
||||
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"ANTHROPIC_API_KEY required for tool calls in Agent SDK mode. "
|
||||
"Set the API key in .env or migrate tools to MCP servers."
|
||||
)
|
||||
|
||||
temp_client = Anthropic(api_key=self.api_key)
|
||||
|
||||
# Use Opus only if explicitly enabled (for intensive tasks on flat-rate)
|
||||
# Otherwise default to Sonnet (cost-effective for normal tool operations)
|
||||
if _USE_OPUS_FOR_TOOLS and self.mode == "agent_sdk":
|
||||
model = _DEFAULT_MODELS.get("claude_agent_sdk_opus", "claude-opus-4-6")
|
||||
else:
|
||||
model = self.model # Use Sonnet (default)
|
||||
|
||||
response = temp_client.messages.create(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
system=system or "",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
system=system,
|
||||
max_tokens=max_tokens,
|
||||
model=self.model
|
||||
)
|
||||
|
||||
# Convert Agent SDK response to anthropic.types.Message format
|
||||
return self._convert_sdk_response_to_message(response)
|
||||
return response
|
||||
|
||||
def _convert_sdk_response_to_message(self, sdk_response: Dict[str, Any]) -> Message:
|
||||
"""Convert Agent SDK response to anthropic.types.Message format.
|
||||
@@ -302,7 +399,7 @@ class LLMInterface:
|
||||
messages: List[Dict],
|
||||
tools: List[Dict[str, Any]],
|
||||
system: Optional[str] = None,
|
||||
max_tokens: int = 4096,
|
||||
max_tokens: int = 16384,
|
||||
use_cache: bool = False,
|
||||
) -> Message:
|
||||
"""Send chat request with tool support. Returns full Message object.
|
||||
@@ -316,8 +413,8 @@ class LLMInterface:
|
||||
# Agent SDK mode (Pro subscription)
|
||||
if self.mode == "agent_sdk":
|
||||
try:
|
||||
# Use anyio to bridge async SDK to sync interface
|
||||
response = anyio.from_thread.run(
|
||||
# Use anyio.run to create event loop for async SDK
|
||||
response = anyio.run(
|
||||
self._agent_sdk_chat_with_tools,
|
||||
messages,
|
||||
tools,
|
||||
|
||||
Reference in New Issue
Block a user