feat: Add Gitea MCP integration and project cleanup
## New Features - **Gitea MCP Tools** (zero API cost): - gitea_read_file: Read files from homelab repo - gitea_list_files: Browse directories - gitea_search_code: Search by filename - gitea_get_tree: Get directory tree - **Gitea Client** (gitea_tools/client.py): REST API wrapper with OAuth - **Proxmox SSH Scripts** (scripts/): Homelab data collection utilities - **Obsidian MCP Support** (obsidian_mcp.py): Advanced vault operations - **Voice Integration Plan** (JARVIS_VOICE_INTEGRATION_PLAN.md) ## Improvements - **Increased timeout**: 5min → 10min for complex tasks (llm_interface.py) - **Removed Direct API fallback**: Gitea tools are MCP-only (zero cost) - **Updated .env.example**: Added Obsidian MCP configuration - **Enhanced .gitignore**: Protect personal memory files (SOUL.md, MEMORY.md) ## Cleanup - Deleted 24 obsolete files (temp/test/experimental scripts, outdated docs) - Untracked personal memory files (SOUL.md, MEMORY.md now in .gitignore) - Removed: AGENT_SDK_IMPLEMENTATION.md, HYBRID_SEARCH_SUMMARY.md, IMPLEMENTATION_SUMMARY.md, MIGRATION.md, test_agent_sdk.py, etc. ## Configuration - Added config/gitea_config.example.yaml (Gitea setup template) - Added config/obsidian_mcp.example.yaml (Obsidian MCP template) - Updated scheduled_tasks.yaml with new task examples Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
724
llm_interface.py
724
llm_interface.py
@@ -1,42 +1,49 @@
|
||||
"""LLM Interface - Claude API, GLM, and other models.
|
||||
|
||||
Supports three modes for Claude:
|
||||
1. Agent SDK (v0.1.36+) - DEFAULT - Uses query() API with Pro subscription
|
||||
Supports two modes for Claude:
|
||||
1. Agent SDK (v0.1.36+) - DEFAULT - Uses query() API with Max subscription
|
||||
- Set USE_AGENT_SDK=true (default)
|
||||
- Model: claude-sonnet-4-5-20250929 (default for all operations)
|
||||
- Optional: USE_OPUS_FOR_TOOLS=true (enables Opus for extremely intensive tasks only)
|
||||
- MCP Tools: File/system tools (read_file, write_file, edit_file, list_directory, run_command)
|
||||
- Traditional Tools: Google tools & weather (fall back to Direct API, requires ANTHROPIC_API_KEY)
|
||||
- Flat-rate subscription cost (no per-token charges for MCP tools)
|
||||
- All tools are MCP-based (no API key needed)
|
||||
- Tools registered via mcp_tools.py MCP server
|
||||
- Flat-rate subscription cost
|
||||
|
||||
2. Direct API (pay-per-token) - Set USE_DIRECT_API=true
|
||||
- Model: claude-sonnet-4-5-20250929 (cost-effective, never uses Opus)
|
||||
- Model: claude-sonnet-4-5-20250929
|
||||
- Requires ANTHROPIC_API_KEY in .env
|
||||
- Full tool support built-in (all tools via traditional API)
|
||||
|
||||
3. Legacy: Local Claude Code server - Set USE_CLAUDE_CODE_SERVER=true (deprecated)
|
||||
- For backward compatibility only
|
||||
- Uses traditional tool definitions from tools.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import atexit
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
import subprocess
|
||||
import threading
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
import requests
|
||||
from anthropic import Anthropic
|
||||
from anthropic.types import Message, ContentBlock, TextBlock, ToolUseBlock, Usage
|
||||
|
||||
from usage_tracker import UsageTracker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Ensure our debug messages are visible even if root logger is at WARNING.
|
||||
# Only add a handler if none exist (prevent duplicate output).
|
||||
if not logger.handlers:
|
||||
_handler = logging.StreamHandler()
|
||||
_handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
))
|
||||
logger.addHandler(_handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Try to import Agent SDK (optional dependency)
|
||||
try:
|
||||
from claude_agent_sdk import (
|
||||
query,
|
||||
UserMessage,
|
||||
AssistantMessage,
|
||||
SystemMessage,
|
||||
ClaudeAgentOptions,
|
||||
ResultMessage,
|
||||
)
|
||||
import anyio
|
||||
AGENT_SDK_AVAILABLE = True
|
||||
except ImportError:
|
||||
AGENT_SDK_AVAILABLE = False
|
||||
@@ -47,29 +54,61 @@ _API_KEY_ENV_VARS = {
|
||||
"glm": "GLM_API_KEY",
|
||||
}
|
||||
|
||||
# Mode selection (priority order: USE_DIRECT_API > USE_CLAUDE_CODE_SERVER > default to Agent SDK)
|
||||
# Mode selection (priority: USE_DIRECT_API > default to Agent SDK)
|
||||
_USE_DIRECT_API = os.getenv("USE_DIRECT_API", "false").lower() == "true"
|
||||
_CLAUDE_CODE_SERVER_URL = os.getenv("CLAUDE_CODE_SERVER_URL", "http://localhost:8000")
|
||||
_USE_CLAUDE_CODE_SERVER = os.getenv("USE_CLAUDE_CODE_SERVER", "false").lower() == "true"
|
||||
# Agent SDK is the default if available and no other mode is explicitly enabled
|
||||
_USE_AGENT_SDK = os.getenv("USE_AGENT_SDK", "true").lower() == "true"
|
||||
|
||||
# Default models by provider
|
||||
_DEFAULT_MODELS = {
|
||||
"claude": "claude-sonnet-4-5-20250929", # For Direct API (pay-per-token) - Sonnet is cost-effective
|
||||
"claude_agent_sdk": "claude-sonnet-4-5-20250929", # For Agent SDK (flat-rate) - Sonnet for normal operations
|
||||
"claude_agent_sdk_opus": "claude-opus-4-6", # For Agent SDK extremely intensive tasks only (flat-rate)
|
||||
"claude": "claude-sonnet-4-5-20250929",
|
||||
"claude_agent_sdk": "claude-sonnet-4-5-20250929",
|
||||
"glm": "glm-4-plus",
|
||||
}
|
||||
|
||||
# When to use Opus (only on Agent SDK flat-rate mode)
|
||||
_USE_OPUS_FOR_TOOLS = os.getenv("USE_OPUS_FOR_TOOLS", "false").lower() == "true"
|
||||
|
||||
_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
|
||||
|
||||
# Track PIDs of claude.exe subprocesses we spawn (to avoid killing user's Claude Code session!)
|
||||
_TRACKED_CLAUDE_PIDS: Set[int] = set()
|
||||
_TRACKED_PIDS_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _register_claude_subprocess(pid: int):
|
||||
"""Register a claude.exe subprocess PID for cleanup on exit."""
|
||||
with _TRACKED_PIDS_LOCK:
|
||||
_TRACKED_CLAUDE_PIDS.add(pid)
|
||||
logger.debug("[LLM] Registered claude.exe subprocess PID: %d", pid)
|
||||
|
||||
|
||||
def _cleanup_tracked_claude_processes():
|
||||
"""Kill only the claude.exe processes we spawned (not the user's Claude Code session!)"""
|
||||
with _TRACKED_PIDS_LOCK:
|
||||
if not _TRACKED_CLAUDE_PIDS:
|
||||
return
|
||||
|
||||
logger.info("[LLM] Cleaning up %d tracked claude.exe subprocess(es)", len(_TRACKED_CLAUDE_PIDS))
|
||||
for pid in _TRACKED_CLAUDE_PIDS:
|
||||
try:
|
||||
if os.name == 'nt': # Windows
|
||||
subprocess.run(
|
||||
['taskkill', '/F', '/PID', str(pid), '/T'],
|
||||
capture_output=True,
|
||||
timeout=2
|
||||
)
|
||||
else: # Linux/Mac
|
||||
subprocess.run(['kill', '-9', str(pid)], capture_output=True, timeout=2)
|
||||
logger.debug("[LLM] Killed claude.exe subprocess PID: %d", pid)
|
||||
except Exception as e:
|
||||
logger.debug("[LLM] Failed to kill PID %d: %s", pid, e)
|
||||
|
||||
_TRACKED_CLAUDE_PIDS.clear()
|
||||
|
||||
|
||||
# Register cleanup on exit (only kills our tracked subprocesses, not all claude.exe!)
|
||||
atexit.register(_cleanup_tracked_claude_processes)
|
||||
|
||||
|
||||
class LLMInterface:
|
||||
"""Simple LLM interface supporting Claude and GLM."""
|
||||
"""LLM interface supporting Claude (Agent SDK or Direct API) and GLM."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -82,26 +121,27 @@ class LLMInterface:
|
||||
_API_KEY_ENV_VARS.get(provider, ""),
|
||||
)
|
||||
self.client: Optional[Anthropic] = None
|
||||
# Model will be set after determining mode
|
||||
|
||||
# Determine mode (priority: direct API > legacy server > agent SDK)
|
||||
# Reference to the main asyncio event loop, set by the runtime.
|
||||
# Used by Agent SDK mode to schedule async work from worker threads
|
||||
# via asyncio.run_coroutine_threadsafe().
|
||||
self._event_loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
|
||||
# Determine mode (priority: direct API > agent SDK)
|
||||
if provider == "claude":
|
||||
if _USE_DIRECT_API:
|
||||
self.mode = "direct_api"
|
||||
elif _USE_CLAUDE_CODE_SERVER:
|
||||
self.mode = "legacy_server"
|
||||
elif _USE_AGENT_SDK and AGENT_SDK_AVAILABLE:
|
||||
self.mode = "agent_sdk"
|
||||
else:
|
||||
# Fallback to direct API if Agent SDK not available
|
||||
self.mode = "direct_api"
|
||||
if _USE_AGENT_SDK and not AGENT_SDK_AVAILABLE:
|
||||
print("[LLM] Warning: Agent SDK not available, falling back to Direct API")
|
||||
print("[LLM] Install with: pip install claude-agent-sdk")
|
||||
else:
|
||||
self.mode = "direct_api" # Non-Claude providers use direct API
|
||||
self.mode = "direct_api"
|
||||
|
||||
# Usage tracking (disabled when using Agent SDK or legacy server)
|
||||
# Usage tracking (only for Direct API pay-per-token mode)
|
||||
self.tracker = UsageTracker() if (track_usage and self.mode == "direct_api") else None
|
||||
|
||||
# Set model based on mode
|
||||
@@ -109,28 +149,125 @@ class LLMInterface:
|
||||
if self.mode == "agent_sdk":
|
||||
self.model = _DEFAULT_MODELS.get("claude_agent_sdk", "claude-sonnet-4-5-20250929")
|
||||
else:
|
||||
self.model = _DEFAULT_MODELS.get(provider, "claude-haiku-4-5-20251001")
|
||||
self.model = _DEFAULT_MODELS.get(provider, "claude-sonnet-4-5-20250929")
|
||||
else:
|
||||
self.model = _DEFAULT_MODELS.get(provider, "")
|
||||
|
||||
# Initialize based on mode
|
||||
if provider == "claude":
|
||||
if self.mode == "agent_sdk":
|
||||
print(f"[LLM] Using Claude Agent SDK (flat-rate subscription) with model: {self.model}")
|
||||
# No initialization needed - query() is a standalone function
|
||||
print(f"[LLM] Using Agent SDK (Max subscription) with model: {self.model}")
|
||||
elif self.mode == "direct_api":
|
||||
print(f"[LLM] Using Direct API (pay-per-token) with model: {self.model}")
|
||||
self.client = Anthropic(api_key=self.api_key)
|
||||
elif self.mode == "legacy_server":
|
||||
print(f"[LLM] Using Claude Code server at {_CLAUDE_CODE_SERVER_URL} (Pro subscription) with model: {self.model}")
|
||||
# Verify server is running
|
||||
try:
|
||||
response = requests.get(f"{_CLAUDE_CODE_SERVER_URL}/", timeout=2)
|
||||
response.raise_for_status()
|
||||
print(f"[LLM] Claude Code server is running: {response.json()}")
|
||||
except Exception as e:
|
||||
print(f"[LLM] Warning: Could not connect to Claude Code server: {e}")
|
||||
print(f"[LLM] Note: Claude Code server mode is deprecated. Using Agent SDK instead.")
|
||||
|
||||
def set_event_loop(self, loop: asyncio.AbstractEventLoop) -> None:
|
||||
"""Store a reference to the main asyncio event loop.
|
||||
|
||||
This allows Agent SDK async calls to be scheduled back onto the
|
||||
main event loop from worker threads (created by asyncio.to_thread).
|
||||
Must be called from the async context that owns the loop.
|
||||
"""
|
||||
self._event_loop = loop
|
||||
logger.info(
|
||||
"[LLM] Event loop stored: %s (running=%s)",
|
||||
type(loop).__name__,
|
||||
loop.is_running(),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _clean_claude_env() -> dict:
|
||||
"""Remove Claude Code session markers from the environment.
|
||||
|
||||
The Agent SDK's SubprocessCLITransport copies os.environ into the
|
||||
child process. If the bot is launched from within a Claude Code
|
||||
session (or any environment that sets CLAUDECODE), the child
|
||||
``claude`` CLI detects the nesting and refuses to start with:
|
||||
|
||||
"Claude Code cannot be launched inside another Claude Code session."
|
||||
|
||||
This method temporarily removes the offending variables and returns
|
||||
them so the caller can restore them afterwards.
|
||||
"""
|
||||
saved = {}
|
||||
# Keys that signal an active Claude Code parent session.
|
||||
# CLAUDE_CODE_ENTRYPOINT and CLAUDE_AGENT_SDK_VERSION are set by
|
||||
# the SDK itself on the child process, so removing them from the
|
||||
# parent is safe -- the SDK will set them again.
|
||||
markers = [
|
||||
"CLAUDECODE",
|
||||
"CLAUDE_CODE_ENTRYPOINT",
|
||||
"CLAUDE_AGENT_SDK_VERSION",
|
||||
"CLAUDE_CODE_ENABLE_SDK_FILE_CHECKPOINTING",
|
||||
]
|
||||
for key in markers:
|
||||
if key in os.environ:
|
||||
saved[key] = os.environ.pop(key)
|
||||
if saved:
|
||||
logger.debug("[LLM] Cleaned Claude session env vars: %s", list(saved.keys()))
|
||||
return saved
|
||||
|
||||
@staticmethod
|
||||
def _restore_claude_env(saved: dict) -> None:
|
||||
"""Restore previously removed Claude session env vars."""
|
||||
os.environ.update(saved)
|
||||
|
||||
def _run_async_from_thread(self, coro) -> Any:
|
||||
"""Run an async coroutine from a synchronous worker thread.
|
||||
|
||||
Uses asyncio.run_coroutine_threadsafe() to schedule the coroutine
|
||||
on the main event loop (if available), which is the correct way to
|
||||
bridge sync -> async when called from an asyncio.to_thread() worker
|
||||
or from any background thread (e.g., the scheduler).
|
||||
|
||||
Falls back to asyncio.run() if no event loop reference is available
|
||||
(e.g., direct script usage without the adapter runtime).
|
||||
|
||||
Args:
|
||||
coro: An already-created coroutine object (not a coroutine function).
|
||||
"""
|
||||
current_thread = threading.current_thread().name
|
||||
has_loop = self._event_loop is not None
|
||||
loop_running = has_loop and self._event_loop.is_running()
|
||||
|
||||
if has_loop and loop_running:
|
||||
logger.info(
|
||||
"[LLM] _run_async_from_thread: using run_coroutine_threadsafe "
|
||||
"(thread=%s, loop=%s)",
|
||||
current_thread,
|
||||
type(self._event_loop).__name__,
|
||||
)
|
||||
# Schedule on the main event loop and block this thread until done.
|
||||
# This works because:
|
||||
# 1. asyncio.to_thread() runs us in a thread pool while the main
|
||||
# loop continues processing other tasks.
|
||||
# 2. Scheduler threads are plain daemon threads, also not blocking
|
||||
# the main loop.
|
||||
# The coroutine executes on the main loop without deadlocking
|
||||
# because the main loop is free to run while we block here.
|
||||
future = asyncio.run_coroutine_threadsafe(coro, self._event_loop)
|
||||
try:
|
||||
# Block with 10-minute timeout to prevent hangs
|
||||
# Complex tasks (repo analysis, multi-step operations) can take 5-8 minutes
|
||||
logger.info("[LLM] Waiting for Agent SDK response (timeout: 600s)...")
|
||||
result = future.result(timeout=600)
|
||||
logger.info("[LLM] Agent SDK response received successfully")
|
||||
return result
|
||||
except TimeoutError:
|
||||
logger.error("[LLM] ⚠️ Agent SDK call TIMED OUT after 600 seconds!")
|
||||
future.cancel() # Cancel the coroutine
|
||||
raise TimeoutError("Agent SDK call exceeded 10 minute timeout - task may be too complex")
|
||||
else:
|
||||
logger.info(
|
||||
"[LLM] _run_async_from_thread: using asyncio.run() fallback "
|
||||
"(thread=%s, has_loop=%s, loop_running=%s)",
|
||||
current_thread,
|
||||
has_loop,
|
||||
loop_running,
|
||||
)
|
||||
# Fallback: no main loop available (standalone / test usage).
|
||||
# Create a new event loop in this thread via asyncio.run().
|
||||
return asyncio.run(coro)
|
||||
|
||||
def chat(
|
||||
self,
|
||||
@@ -140,44 +277,24 @@ class LLMInterface:
|
||||
) -> str:
|
||||
"""Send chat request and get response.
|
||||
|
||||
In Agent SDK mode, this uses query() which handles MCP tools automatically.
|
||||
In Direct API mode, this is a simple messages.create() call without tools.
|
||||
|
||||
Raises:
|
||||
Exception: If the API call fails or returns an unexpected response.
|
||||
"""
|
||||
if self.provider == "claude":
|
||||
# Agent SDK mode (Pro subscription)
|
||||
if self.mode == "agent_sdk":
|
||||
try:
|
||||
# Use anyio.run to create event loop for async SDK
|
||||
response = anyio.run(
|
||||
self._agent_sdk_chat,
|
||||
messages,
|
||||
system,
|
||||
max_tokens
|
||||
logger.info("[LLM] chat: dispatching via Agent SDK")
|
||||
response = self._run_async_from_thread(
|
||||
self._agent_sdk_chat(messages, system, max_tokens)
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error("[LLM] Agent SDK error in chat(): %s", e, exc_info=True)
|
||||
raise Exception(f"Agent SDK error: {e}")
|
||||
|
||||
# Legacy Claude Code server (Pro subscription)
|
||||
elif self.mode == "legacy_server":
|
||||
try:
|
||||
payload = {
|
||||
"messages": [{"role": m["role"], "content": m["content"]} for m in messages],
|
||||
"system": system,
|
||||
"max_tokens": max_tokens
|
||||
}
|
||||
response = requests.post(
|
||||
f"{_CLAUDE_CODE_SERVER_URL}/v1/chat",
|
||||
json=payload,
|
||||
timeout=120
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("content", "")
|
||||
except Exception as e:
|
||||
raise Exception(f"Claude Code server error: {e}")
|
||||
|
||||
# Direct API (pay-per-token)
|
||||
elif self.mode == "direct_api":
|
||||
response = self.client.messages.create(
|
||||
model=self.model,
|
||||
@@ -186,7 +303,6 @@ class LLMInterface:
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
# Track usage
|
||||
if self.tracker and hasattr(response, "usage"):
|
||||
self.tracker.track(
|
||||
model=self.model,
|
||||
@@ -222,177 +338,263 @@ class LLMInterface:
|
||||
|
||||
raise ValueError(f"Unsupported provider: {self.provider}")
|
||||
|
||||
def _build_agent_sdk_options(self) -> Optional['ClaudeAgentOptions']:
|
||||
"""Build Agent SDK options with MCP servers and allowed tools.
|
||||
|
||||
Returns configured ClaudeAgentOptions, or None if mcp_tools is unavailable.
|
||||
"""
|
||||
try:
|
||||
from mcp_tools import file_system_server
|
||||
|
||||
mcp_servers = {"file_system": file_system_server}
|
||||
|
||||
# All tools registered in the MCP server
|
||||
allowed_tools = [
|
||||
# File and system tools
|
||||
"read_file",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"list_directory",
|
||||
"run_command",
|
||||
# Web tool
|
||||
"web_fetch",
|
||||
# Zettelkasten tools
|
||||
"fleeting_note",
|
||||
"daily_note",
|
||||
"literature_note",
|
||||
"permanent_note",
|
||||
"search_vault",
|
||||
"search_by_tags",
|
||||
# Google tools (Gmail, Calendar, Contacts)
|
||||
"get_weather",
|
||||
"send_email",
|
||||
"read_emails",
|
||||
"get_email",
|
||||
"read_calendar",
|
||||
"create_calendar_event",
|
||||
"search_calendar",
|
||||
"create_contact",
|
||||
"list_contacts",
|
||||
"get_contact",
|
||||
# Gitea tools (private repo access)
|
||||
"gitea_read_file",
|
||||
"gitea_list_files",
|
||||
"gitea_search_code",
|
||||
"gitea_get_tree",
|
||||
]
|
||||
|
||||
# Conditionally add Obsidian MCP server
|
||||
try:
|
||||
from obsidian_mcp import (
|
||||
is_obsidian_enabled,
|
||||
check_obsidian_health,
|
||||
get_obsidian_server_config,
|
||||
OBSIDIAN_TOOLS,
|
||||
)
|
||||
|
||||
if is_obsidian_enabled() and check_obsidian_health():
|
||||
obsidian_config = get_obsidian_server_config()
|
||||
mcp_servers["obsidian"] = obsidian_config
|
||||
allowed_tools.extend(OBSIDIAN_TOOLS)
|
||||
print("[LLM] Obsidian MCP server registered (8 tools)")
|
||||
elif is_obsidian_enabled():
|
||||
print("[LLM] Obsidian MCP enabled but health check failed")
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"[LLM] Obsidian MCP unavailable: {e}")
|
||||
|
||||
def _stderr_callback(line: str) -> None:
|
||||
"""Log Claude CLI stderr for debugging transport failures."""
|
||||
logger.debug("[CLI stderr] %s", line)
|
||||
|
||||
return ClaudeAgentOptions(
|
||||
mcp_servers=mcp_servers,
|
||||
allowed_tools=allowed_tools,
|
||||
permission_mode="bypassPermissions",
|
||||
max_turns=30, # Prevent infinite tool loops (matches MAX_TOOL_ITERATIONS)
|
||||
stderr=_stderr_callback,
|
||||
)
|
||||
except ImportError:
|
||||
print("[LLM] Warning: mcp_tools not available, no MCP tools will be registered")
|
||||
return None
|
||||
|
||||
async def _agent_sdk_chat(
|
||||
self,
|
||||
messages: List[Dict],
|
||||
system: Optional[str],
|
||||
max_tokens: int
|
||||
) -> str:
|
||||
"""Internal async method for Agent SDK chat (called via anyio bridge)."""
|
||||
# Convert messages to SDK format
|
||||
sdk_messages = []
|
||||
for msg in messages:
|
||||
if msg["role"] == "user":
|
||||
sdk_messages.append(UserMessage(content=msg["content"]))
|
||||
elif msg["role"] == "assistant":
|
||||
sdk_messages.append(AssistantMessage(content=msg["content"]))
|
||||
"""Agent SDK chat via custom transport flow.
|
||||
|
||||
# Add system message if provided
|
||||
if system:
|
||||
sdk_messages.insert(0, SystemMessage(content=system))
|
||||
Uses the SDK's transport and query layers directly instead of the
|
||||
high-level ``query()`` helper. This works around a bug in
|
||||
``claude_agent_sdk._internal.client.process_query`` where
|
||||
``end_input()`` is called immediately after sending the user message
|
||||
for string prompts. That premature stdin close kills the
|
||||
bidirectional control channel that SDK MCP servers need to handle
|
||||
``tools/list`` and ``tools/call`` requests from the CLI subprocess,
|
||||
resulting in ``CLIConnectionError: ProcessTransport is not ready for
|
||||
writing``.
|
||||
|
||||
# Configure MCP server for file/system tools
|
||||
try:
|
||||
from mcp_tools import file_system_server
|
||||
Our fix: defer ``end_input()`` until after the first ``ResultMessage``
|
||||
is received, matching the logic already present in
|
||||
``Query.stream_input()`` for async-iterable prompts.
|
||||
"""
|
||||
import json as _json
|
||||
|
||||
options = ClaudeAgentOptions(
|
||||
mcp_servers={"file_system": file_system_server},
|
||||
# Allow all MCP tools (file/system + web + zettelkasten)
|
||||
allowed_tools=[
|
||||
"read_file",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"list_directory",
|
||||
"run_command",
|
||||
"web_fetch",
|
||||
"fleeting_note",
|
||||
"daily_note",
|
||||
"literature_note",
|
||||
"permanent_note",
|
||||
"search_vault",
|
||||
"search_by_tags",
|
||||
],
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback if mcp_tools not available
|
||||
options = None
|
||||
|
||||
# Call the new query() API
|
||||
# Note: Agent SDK handles max_tokens internally, don't pass it explicitly
|
||||
response = await query(
|
||||
messages=sdk_messages,
|
||||
options=options,
|
||||
# model parameter is handled by the SDK based on settings
|
||||
# Lazy imports from SDK internals.
|
||||
from claude_agent_sdk._internal.transport.subprocess_cli import (
|
||||
SubprocessCLITransport,
|
||||
)
|
||||
from claude_agent_sdk._internal.query import Query
|
||||
from claude_agent_sdk._internal.message_parser import parse_message
|
||||
|
||||
# Extract text from response
|
||||
if hasattr(response, "content"):
|
||||
# Handle list of content blocks
|
||||
if isinstance(response.content, list):
|
||||
text_parts = []
|
||||
for block in response.content:
|
||||
if hasattr(block, "text"):
|
||||
text_parts.append(block.text)
|
||||
return "".join(text_parts)
|
||||
# Handle single text content
|
||||
elif isinstance(response.content, str):
|
||||
return response.content
|
||||
# Build the prompt from the system prompt and conversation history.
|
||||
prompt = self._build_sdk_prompt(messages, system)
|
||||
options = self._build_agent_sdk_options()
|
||||
|
||||
return str(response)
|
||||
# Clean Claude session env vars so the child CLI process doesn't
|
||||
# detect a "nested session" and refuse to start.
|
||||
saved_env = self._clean_claude_env()
|
||||
|
||||
async def _agent_sdk_chat_with_tools(
|
||||
try:
|
||||
# --- 1. Create and connect the subprocess transport. ---
|
||||
transport = SubprocessCLITransport(prompt=prompt, options=options)
|
||||
await transport.connect()
|
||||
|
||||
# Track the subprocess PID for cleanup on exit
|
||||
if hasattr(transport, '_process') and transport._process:
|
||||
_register_claude_subprocess(transport._process.pid)
|
||||
|
||||
# --- 2. Extract in-process SDK MCP server instances. ---
|
||||
sdk_mcp_servers: Dict = {}
|
||||
if options.mcp_servers and isinstance(options.mcp_servers, dict):
|
||||
for name, config in options.mcp_servers.items():
|
||||
if isinstance(config, dict) and config.get("type") == "sdk":
|
||||
sdk_mcp_servers[name] = config["instance"]
|
||||
|
||||
# --- 3. Create the Query object (control-protocol handler). ---
|
||||
query_obj = Query(
|
||||
transport=transport,
|
||||
is_streaming_mode=True,
|
||||
sdk_mcp_servers=sdk_mcp_servers,
|
||||
)
|
||||
|
||||
try:
|
||||
# Start the background reader task.
|
||||
await query_obj.start()
|
||||
|
||||
# Perform the initialize handshake with the CLI.
|
||||
await query_obj.initialize()
|
||||
|
||||
# Send the user message over stdin.
|
||||
user_msg = {
|
||||
"type": "user",
|
||||
"session_id": "",
|
||||
"message": {"role": "user", "content": prompt},
|
||||
"parent_tool_use_id": None,
|
||||
}
|
||||
await transport.write(_json.dumps(user_msg) + "\n")
|
||||
|
||||
# **KEY FIX**: Do NOT call end_input() yet. The CLI will
|
||||
# send MCP control requests (tools/list, tools/call) over
|
||||
# the bidirectional channel. Closing stdin now would
|
||||
# prevent us from writing responses back. We wait for the
|
||||
# first ResultMessage instead.
|
||||
|
||||
# --- 4. Consume messages until we get a ResultMessage. ---
|
||||
result_text = ""
|
||||
message_count = 0
|
||||
async for data in query_obj.receive_messages():
|
||||
message = parse_message(data)
|
||||
message_count += 1
|
||||
|
||||
# Log all message types for debugging hangs
|
||||
message_type = type(message).__name__
|
||||
logger.debug(f"[LLM] Received message #{message_count}: {message_type}")
|
||||
|
||||
if isinstance(message, ResultMessage):
|
||||
result_text = message.result or ""
|
||||
logger.info(
|
||||
"[LLM] Agent SDK result received after %d messages: cost=$%.4f, turns=%s",
|
||||
message_count,
|
||||
getattr(message, "total_cost_usd", 0),
|
||||
getattr(message, "num_turns", "?"),
|
||||
)
|
||||
break
|
||||
|
||||
# Log non-result messages to detect loops
|
||||
if message_count % 10 == 0:
|
||||
logger.warning(f"[LLM] Still waiting for ResultMessage after {message_count} messages...")
|
||||
|
||||
# Now that we have the result, close stdin gracefully.
|
||||
try:
|
||||
await transport.end_input()
|
||||
except Exception:
|
||||
pass # Process may have already exited; that's fine.
|
||||
|
||||
return result_text
|
||||
|
||||
finally:
|
||||
# Always clean up the query/transport.
|
||||
try:
|
||||
await query_obj.close()
|
||||
except Exception:
|
||||
# Suppress errors during cleanup (e.g. if process
|
||||
# already exited and there are pending control
|
||||
# request tasks that can't write back).
|
||||
pass
|
||||
finally:
|
||||
# Always restore env vars, even on error.
|
||||
self._restore_claude_env(saved_env)
|
||||
|
||||
def _build_sdk_prompt(
|
||||
self,
|
||||
messages: List[Dict],
|
||||
tools: List[Dict[str, Any]],
|
||||
system: Optional[str],
|
||||
max_tokens: int
|
||||
) -> Message:
|
||||
"""Internal async method for Agent SDK chat with tools (called via anyio bridge).
|
||||
) -> str:
|
||||
"""Build a prompt string for the Agent SDK query() from conversation history.
|
||||
|
||||
NOTE: The new Claude Agent SDK (v0.1.36+) uses MCP servers for tools.
|
||||
For backward compatibility with the existing tool system, we fall back
|
||||
to the Direct API for tool calls. This means tool calls will consume API tokens
|
||||
even when Agent SDK mode is enabled.
|
||||
|
||||
Uses Sonnet by default. Opus can be enabled via USE_OPUS_FOR_TOOLS=true for
|
||||
extremely intensive tasks (only recommended for Agent SDK flat-rate mode).
|
||||
The SDK expects a single prompt string. We combine the system prompt
|
||||
and conversation history into a coherent prompt.
|
||||
"""
|
||||
# Fallback to Direct API for tool calls (SDK tools use MCP servers)
|
||||
from anthropic import Anthropic
|
||||
parts = []
|
||||
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"ANTHROPIC_API_KEY required for tool calls in Agent SDK mode. "
|
||||
"Set the API key in .env or migrate tools to MCP servers."
|
||||
)
|
||||
if system:
|
||||
parts.append(f"<system>\n{system}\n</system>\n")
|
||||
|
||||
temp_client = Anthropic(api_key=self.api_key)
|
||||
# Include recent conversation history for context
|
||||
for msg in messages:
|
||||
content = msg.get("content", "")
|
||||
role = msg["role"]
|
||||
|
||||
# Use Opus only if explicitly enabled (for intensive tasks on flat-rate)
|
||||
# Otherwise default to Sonnet (cost-effective for normal tool operations)
|
||||
if _USE_OPUS_FOR_TOOLS and self.mode == "agent_sdk":
|
||||
model = _DEFAULT_MODELS.get("claude_agent_sdk_opus", "claude-opus-4-6")
|
||||
else:
|
||||
model = self.model # Use Sonnet (default)
|
||||
if isinstance(content, str):
|
||||
if role == "user":
|
||||
parts.append(f"User: {content}")
|
||||
elif role == "assistant":
|
||||
parts.append(f"Assistant: {content}")
|
||||
elif isinstance(content, list):
|
||||
# Structured content (tool_use/tool_result blocks from Direct API history)
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict):
|
||||
if block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
elif block.get("type") == "tool_result":
|
||||
text_parts.append(f"[Tool result]: {block.get('content', '')}")
|
||||
elif block.get("type") == "tool_use":
|
||||
text_parts.append(f"[Used tool: {block.get('name', 'unknown')}]")
|
||||
elif hasattr(block, "type"):
|
||||
if block.type == "text":
|
||||
text_parts.append(block.text)
|
||||
if text_parts:
|
||||
if role == "user":
|
||||
parts.append(f"User: {' '.join(text_parts)}")
|
||||
elif role == "assistant":
|
||||
parts.append(f"Assistant: {' '.join(text_parts)}")
|
||||
|
||||
response = temp_client.messages.create(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
system=system or "",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _convert_sdk_response_to_message(self, sdk_response: Dict[str, Any]) -> Message:
|
||||
"""Convert Agent SDK response to anthropic.types.Message format.
|
||||
|
||||
This ensures compatibility with agent.py's existing tool loop.
|
||||
"""
|
||||
# Extract content blocks
|
||||
content_blocks = []
|
||||
raw_content = sdk_response.get("content", [])
|
||||
|
||||
if isinstance(raw_content, str):
|
||||
# Simple text response
|
||||
content_blocks = [TextBlock(type="text", text=raw_content)]
|
||||
elif isinstance(raw_content, list):
|
||||
# List of content blocks
|
||||
for block in raw_content:
|
||||
if isinstance(block, dict):
|
||||
if block.get("type") == "text":
|
||||
content_blocks.append(TextBlock(
|
||||
type="text",
|
||||
text=block.get("text", "")
|
||||
))
|
||||
elif block.get("type") == "tool_use":
|
||||
content_blocks.append(ToolUseBlock(
|
||||
type="tool_use",
|
||||
id=block.get("id", ""),
|
||||
name=block.get("name", ""),
|
||||
input=block.get("input", {})
|
||||
))
|
||||
|
||||
# Extract usage information
|
||||
usage_data = sdk_response.get("usage", {})
|
||||
usage = Usage(
|
||||
input_tokens=usage_data.get("input_tokens", 0),
|
||||
output_tokens=usage_data.get("output_tokens", 0)
|
||||
)
|
||||
|
||||
# Create Message object
|
||||
# Note: We create a minimal Message-compatible object
|
||||
# The Message class from anthropic.types is read-only, so we create a mock
|
||||
# Capture self.model before defining inner class
|
||||
model_name = sdk_response.get("model", self.model)
|
||||
|
||||
class MessageLike:
|
||||
def __init__(self, content, stop_reason, usage, model):
|
||||
self.content = content
|
||||
self.stop_reason = stop_reason
|
||||
self.usage = usage
|
||||
self.id = sdk_response.get("id", "sdk_message")
|
||||
self.model = model
|
||||
self.role = "assistant"
|
||||
self.type = "message"
|
||||
|
||||
return MessageLike(
|
||||
content=content_blocks,
|
||||
stop_reason=sdk_response.get("stop_reason", "end_turn"),
|
||||
usage=usage,
|
||||
model=model_name
|
||||
)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def chat_with_tools(
|
||||
self,
|
||||
@@ -401,70 +603,43 @@ class LLMInterface:
|
||||
system: Optional[str] = None,
|
||||
max_tokens: int = 16384,
|
||||
use_cache: bool = False,
|
||||
) -> Message:
|
||||
"""Send chat request with tool support. Returns full Message object.
|
||||
) -> Any:
|
||||
"""Send chat request with tool support.
|
||||
|
||||
In Agent SDK mode: Uses query() with MCP tools. The SDK handles tool
|
||||
execution automatically. Returns a string (final response after all
|
||||
tool calls are resolved).
|
||||
|
||||
In Direct API mode: Returns an anthropic Message object with potential
|
||||
tool_use blocks that agent.py processes in a manual loop.
|
||||
|
||||
Args:
|
||||
use_cache: Enable prompt caching for Sonnet models (saves 90% on repeated context)
|
||||
tools: Tool definitions (used by Direct API; ignored in Agent SDK mode
|
||||
since tools are registered via MCP servers).
|
||||
use_cache: Enable prompt caching for Sonnet (Direct API only).
|
||||
"""
|
||||
if self.provider != "claude":
|
||||
raise ValueError("Tool use only supported for Claude provider")
|
||||
|
||||
# Agent SDK mode (Pro subscription)
|
||||
if self.mode == "agent_sdk":
|
||||
# Agent SDK handles tool calls automatically via MCP servers.
|
||||
# We use the same query() path as chat(), since MCP tools are
|
||||
# already registered. The SDK will invoke tools, collect results,
|
||||
# and return the final text response.
|
||||
try:
|
||||
# Use anyio.run to create event loop for async SDK
|
||||
response = anyio.run(
|
||||
self._agent_sdk_chat_with_tools,
|
||||
messages,
|
||||
tools,
|
||||
system,
|
||||
max_tokens
|
||||
logger.info("[LLM] chat_with_tools: dispatching via Agent SDK")
|
||||
response = self._run_async_from_thread(
|
||||
self._agent_sdk_chat(messages, system, max_tokens)
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error("[LLM] Agent SDK error: %s", e, exc_info=True)
|
||||
raise Exception(f"Agent SDK error: {e}")
|
||||
|
||||
# Legacy Claude Code server (Pro subscription)
|
||||
elif self.mode == "legacy_server":
|
||||
try:
|
||||
payload = {
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"system": system,
|
||||
"max_tokens": max_tokens
|
||||
}
|
||||
response = requests.post(
|
||||
f"{_CLAUDE_CODE_SERVER_URL}/v1/chat/tools",
|
||||
json=payload,
|
||||
timeout=120
|
||||
)
|
||||
response.raise_for_status()
|
||||
# Convert response to Message-like object
|
||||
data = response.json()
|
||||
|
||||
# Create a mock Message object with the response
|
||||
class MockMessage:
|
||||
def __init__(self, data):
|
||||
self.content = data.get("content", [])
|
||||
self.stop_reason = data.get("stop_reason", "end_turn")
|
||||
self.usage = type('obj', (object,), {
|
||||
'input_tokens': data.get("usage", {}).get("input_tokens", 0),
|
||||
'output_tokens': data.get("usage", {}).get("output_tokens", 0)
|
||||
})
|
||||
|
||||
return MockMessage(data)
|
||||
except Exception as e:
|
||||
raise Exception(f"Claude Code server error: {e}")
|
||||
|
||||
# Direct API (pay-per-token)
|
||||
elif self.mode == "direct_api":
|
||||
# Enable caching only for Sonnet models (not worth it for Haiku)
|
||||
enable_caching = use_cache and "sonnet" in self.model.lower()
|
||||
|
||||
# Structure system prompt for optimal caching
|
||||
if enable_caching and system:
|
||||
# Convert string to list format with cache control
|
||||
system_blocks = [
|
||||
{
|
||||
"type": "text",
|
||||
@@ -483,7 +658,6 @@ class LLMInterface:
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
# Track usage
|
||||
if self.tracker and hasattr(response, "usage"):
|
||||
self.tracker.track(
|
||||
model=self.model,
|
||||
|
||||
Reference in New Issue
Block a user