Add MCP delegation bridge and diagram tools

**Features Added**:

1. **Agent Registry (agent_registry.py)**
   - Thread-safe global singleton for MCP tool access to Agent instance
   - Enables MCP tools to call Agent.delegate() without circular imports
   - Registered at bot startup in bot_runner.py

2. **Sub-Agent Manager (sub_agent_manager.py)**
   - Watchdog system monitoring sub-agent lifecycle
   - Detects hung agents (5min timeout, 30s check interval)
   - Auto-cleanup and status tracking

3. **delegate_task MCP Tool (mcp_tools.py)**
   - Exposes Agent.delegate() to Claude via MCP protocol
   - Enables parallel sub-agent execution via tool calls
   - Supports specialist prompts and agent ID caching

4. **Memory Write Locks (memory_system.py)**
   - Thread-safe writes to prevent file corruption
   - Protects write_memory(), update_soul(), update_user()

5. **Diagram Tools**
   - Mermaid MCP server (flowcharts, sequence diagrams, etc.)
   - Excalidraw MCP server (hand-drawn style diagrams)
   - Config files in config/ directory

6. **Adapter Improvements**
   - Enhanced error handling across all adapters
   - Unified logging patterns

**Testing**: Ready for parallel sub-agent testing

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-03-01 14:34:24 -07:00
parent dd5beb11c2
commit e909cc0044
13 changed files with 1081 additions and 26 deletions

View File

@@ -10,6 +10,7 @@ import subprocess
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse
from datetime import datetime
import threading
from claude_agent_sdk import tool, create_sdk_mcp_server
import httpx
from bs4 import BeautifulSoup
@@ -21,9 +22,16 @@ try:
except ImportError:
MEMORY_AVAILABLE = False
# Import agent registry for delegate_task tool
try:
from agent_registry import get_agent
AGENT_REGISTRY_AVAILABLE = True
except ImportError:
AGENT_REGISTRY_AVAILABLE = False
# Maximum characters of tool output to return (prevents token explosion)
_MAX_TOOL_OUTPUT = 5000
_MAX_TOOL_OUTPUT = 5000 # Restored for complex diagram generation
# Maximum page size for web fetching (500KB)
_MAX_WEB_PAGE_SIZE = 500_000
@@ -188,7 +196,7 @@ def _is_safe_url(url: str) -> bool:
},
)
async def read_file_tool(args: Dict[str, Any]) -> Dict[str, Any]:
"""Read and return file contents."""
"""Read and return file contents with auto-retry for PDFs."""
file_path = args["file_path"]
path = Path(file_path)
@@ -198,6 +206,136 @@ async def read_file_tool(args: Dict[str, Any]) -> Dict[str, Any]:
"isError": True
}
# Check if it's a PDF
is_pdf = path.suffix.lower() == ".pdf"
if is_pdf:
# Try reading PDF with multiple methods
for attempt, method in enumerate(["pypdf", "pdfplumber", "pdfminer"], 1):
try:
if method == "pypdf":
try:
from pypdf import PdfReader
except ImportError:
continue # Try next method
reader = PdfReader(path)
# Check if actually password-protected
if reader.is_encrypted:
# Try with empty password first (some PDFs are "encrypted" with no password)
try:
reader.decrypt("")
except Exception:
return {
"content": [{"type": "text", "text": f"PDF is password-protected and cannot be read without the password."}],
"isError": True
}
# Extract text from all pages with early truncation
text_parts = []
total_length = 0
truncated = False
for i, page in enumerate(reader.pages, 1):
page_text = page.extract_text()
if page_text.strip():
page_section = f"--- Page {i} ---\n{page_text}"
# Check if adding this page would exceed limit
if total_length + len(page_section) + 2 > _MAX_TOOL_OUTPUT: # +2 for "\n\n"
# Add partial page if there's room
remaining = _MAX_TOOL_OUTPUT - total_length - 2
if remaining > 100: # Only add if we can fit meaningful content
text_parts.append(page_section[:remaining])
truncated = True
break
text_parts.append(page_section)
total_length += len(page_section) + 2
content = "\n\n".join(text_parts)
if truncated:
content += f"\n... (PDF truncated - showing first {len(text_parts)} of {len(reader.pages)} pages)"
return {
"content": [{"type": "text", "text": f"Content of {file_path} ({len(reader.pages)} pages):\n\n{content}"}]
}
elif method == "pdfplumber":
try:
import pdfplumber
except ImportError:
continue
with pdfplumber.open(path) as pdf:
text_parts = []
total_length = 0
truncated = False
total_pages = len(pdf.pages)
for i, page in enumerate(pdf.pages, 1):
page_text = page.extract_text()
if page_text and page_text.strip():
page_section = f"--- Page {i} ---\n{page_text}"
# Check if adding this page would exceed limit
if total_length + len(page_section) + 2 > _MAX_TOOL_OUTPUT:
remaining = _MAX_TOOL_OUTPUT - total_length - 2
if remaining > 100:
text_parts.append(page_section[:remaining])
truncated = True
break
text_parts.append(page_section)
total_length += len(page_section) + 2
content = "\n\n".join(text_parts)
if truncated:
content += f"\n... (PDF truncated - showing first {len(text_parts)} of {total_pages} pages)"
return {
"content": [{"type": "text", "text": f"Content of {file_path} ({total_pages} pages):\n\n{content}"}]
}
elif method == "pdfminer":
try:
from pdfminer.high_level import extract_text as pdfminer_extract
except ImportError:
continue
content = pdfminer_extract(path)
if len(content) > _MAX_TOOL_OUTPUT:
content = content[:_MAX_TOOL_OUTPUT] + "\n... (PDF truncated)"
return {
"content": [{"type": "text", "text": f"Content of {file_path}:\n\n{content}"}]
}
except Exception as e:
# If this is the last attempt, return the error
if attempt == 3:
error_msg = str(e).lower()
if "password" in error_msg or "encrypted" in error_msg:
return {
"content": [{"type": "text", "text": f"PDF appears to be password-protected: {str(e)}"}],
"isError": True
}
else:
return {
"content": [{"type": "text", "text": f"Error reading PDF after trying multiple methods: {str(e)}. The PDF might be corrupted or use an unsupported format."}],
"isError": True
}
# Otherwise, continue to next method
continue
# If we get here, no PDF library is installed
return {
"content": [{"type": "text", "text": f"Cannot read PDF: No PDF library installed. Install with: pip install pypdf pdfplumber"}],
"isError": True
}
# Non-PDF files: try reading as text
try:
content = path.read_text(encoding="utf-8")
if len(content) > _MAX_TOOL_OUTPUT:
@@ -206,6 +344,12 @@ async def read_file_tool(args: Dict[str, Any]) -> Dict[str, Any]:
return {
"content": [{"type": "text", "text": f"Content of {file_path}:\n\n{content}"}]
}
except UnicodeDecodeError:
# Binary file that's not a PDF
return {
"content": [{"type": "text", "text": f"Error: {file_path} appears to be a binary file. Only text files and PDFs are supported."}],
"isError": True
}
except Exception as e:
return {
"content": [{"type": "text", "text": f"Error reading file: {str(e)}"}],
@@ -1778,6 +1922,129 @@ async def gitea_get_tree_tool(args: Dict[str, Any]) -> Dict[str, Any]:
}
# ============================================
# Sub-Agent Delegation Tool (MCP Bridge)
# ============================================
@tool(
name="delegate_task",
description=(
"Delegate a task to a specialist sub-agent. The sub-agent runs in a separate "
"thread with its own conversation context but shares the memory workspace. "
"Use this to parallelize work (e.g., creating multiple diagrams, researching "
"multiple topics). Each sub-agent gets a specialist prompt defining its role. "
"Returns the sub-agent's final response text."
),
input_schema={
"task": str,
"specialist_prompt": str,
"agent_id": str,
},
)
async def delegate_task_tool(args: Dict[str, Any]) -> Dict[str, Any]:
"""Delegate a task to a specialist sub-agent via the main Agent.
This MCP tool bridges the gap between the Agent SDK subprocess (claude.exe)
and the in-process Agent.delegate() method. It retrieves the main Agent
from the global registry and calls delegate() synchronously.
Thread-safe: Agent.delegate() uses Agent._chat_lock internally, and
MemorySystem.write_memory() uses _write_lock for file operations.
"""
task = args.get("task", "")
specialist_prompt = args.get("specialist_prompt", "")
agent_id = args.get("agent_id", "")
# Validate required fields
if not task:
return {
"content": [{"type": "text", "text": "Error: 'task' is required"}],
"isError": True,
}
if not specialist_prompt:
return {
"content": [{
"type": "text",
"text": "Error: 'specialist_prompt' is required (defines the sub-agent role)",
}],
"isError": True,
}
# Check agent registry availability
if not AGENT_REGISTRY_AVAILABLE:
return {
"content": [{
"type": "text",
"text": "Error: agent_registry module not available. Cannot delegate tasks.",
}],
"isError": True,
}
# Get the main agent from the global registry
agent = get_agent()
if agent is None:
return {
"content": [{
"type": "text",
"text": (
"Error: No agent registered. The bot may still be starting up, "
"or agent_registry.register_agent() was not called at startup."
),
}],
"isError": True,
}
# Generate agent_id if not provided
if not agent_id:
agent_id = f"sub_{threading.current_thread().name}_{id(args)}"
try:
# Run delegate in a thread to avoid blocking the async event loop.
# Agent.delegate() is synchronous (calls sub_agent.chat() which holds _chat_lock).
import asyncio
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None, # Use default thread pool
lambda: agent.delegate(
task=task,
specialist_prompt=specialist_prompt,
username="default",
agent_id=agent_id,
max_retries=1,
),
)
# Truncate result if too large
if len(result) > _MAX_TOOL_OUTPUT:
result = result[:_MAX_TOOL_OUTPUT] + "\n... (sub-agent output truncated)"
return {
"content": [{
"type": "text",
"text": f"[Sub-agent {agent_id}] Task completed:\n\n{result}",
}],
}
except TimeoutError:
return {
"content": [{
"type": "text",
"text": f"Error: Sub-agent '{agent_id}' timed out. Task may be too complex.",
}],
"isError": True,
}
except Exception as e:
return {
"content": [{
"type": "text",
"text": f"Error delegating to sub-agent '{agent_id}': {type(e).__name__}: {str(e)}",
}],
"isError": True,
}
# Create the MCP server with all tools
file_system_server = create_sdk_mcp_server(
name="file_system",
@@ -1817,5 +2084,7 @@ file_system_server = create_sdk_mcp_server(
gitea_list_files_tool,
gitea_search_code_tool,
gitea_get_tree_tool,
# Sub-agent delegation
delegate_task_tool,
]
)