Files
ajarbot/llm_interface.py

519 lines
20 KiB
Python
Raw Normal View History

"""LLM Interface - Claude API, GLM, and other models.
Supports three modes for Claude:
1. Agent SDK (v0.1.36+) - DEFAULT - Uses query() API with Pro subscription
- Set USE_AGENT_SDK=true (default)
- Model: claude-sonnet-4-5-20250929 (default for all operations)
- Optional: USE_OPUS_FOR_TOOLS=true (enables Opus for extremely intensive tasks only)
- MCP Tools: File/system tools (read_file, write_file, edit_file, list_directory, run_command)
- Traditional Tools: Google tools & weather (fall back to Direct API, requires ANTHROPIC_API_KEY)
- Flat-rate subscription cost (no per-token charges for MCP tools)
2. Direct API (pay-per-token) - Set USE_DIRECT_API=true
- Model: claude-sonnet-4-5-20250929 (cost-effective, never uses Opus)
- Requires ANTHROPIC_API_KEY in .env
- Full tool support built-in (all tools via traditional API)
3. Legacy: Local Claude Code server - Set USE_CLAUDE_CODE_SERVER=true (deprecated)
- For backward compatibility only
"""
import os
from typing import Any, Dict, List, Optional
import requests
from anthropic import Anthropic
from anthropic.types import Message, ContentBlock, TextBlock, ToolUseBlock, Usage
from usage_tracker import UsageTracker
# Try to import Agent SDK (optional dependency)
try:
from claude_agent_sdk import (
query,
UserMessage,
AssistantMessage,
SystemMessage,
ClaudeAgentOptions,
)
import anyio
AGENT_SDK_AVAILABLE = True
except ImportError:
AGENT_SDK_AVAILABLE = False
# API key environment variable names by provider
_API_KEY_ENV_VARS = {
"claude": "ANTHROPIC_API_KEY",
"glm": "GLM_API_KEY",
}
# Mode selection (priority order: USE_DIRECT_API > USE_CLAUDE_CODE_SERVER > default to Agent SDK)
_USE_DIRECT_API = os.getenv("USE_DIRECT_API", "false").lower() == "true"
_CLAUDE_CODE_SERVER_URL = os.getenv("CLAUDE_CODE_SERVER_URL", "http://localhost:8000")
_USE_CLAUDE_CODE_SERVER = os.getenv("USE_CLAUDE_CODE_SERVER", "false").lower() == "true"
# Agent SDK is the default if available and no other mode is explicitly enabled
_USE_AGENT_SDK = os.getenv("USE_AGENT_SDK", "true").lower() == "true"
# Default models by provider
_DEFAULT_MODELS = {
"claude": "claude-sonnet-4-5-20250929", # For Direct API (pay-per-token) - Sonnet is cost-effective
"claude_agent_sdk": "claude-sonnet-4-5-20250929", # For Agent SDK (flat-rate) - Sonnet for normal operations
"claude_agent_sdk_opus": "claude-opus-4-6", # For Agent SDK extremely intensive tasks only (flat-rate)
"glm": "glm-4-plus",
}
# When to use Opus (only on Agent SDK flat-rate mode)
_USE_OPUS_FOR_TOOLS = os.getenv("USE_OPUS_FOR_TOOLS", "false").lower() == "true"
_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
class LLMInterface:
"""Simple LLM interface supporting Claude and GLM."""
def __init__(
self,
provider: str = "claude",
api_key: Optional[str] = None,
track_usage: bool = True,
) -> None:
self.provider = provider
self.api_key = api_key or os.getenv(
_API_KEY_ENV_VARS.get(provider, ""),
)
self.client: Optional[Anthropic] = None
# Model will be set after determining mode
# Determine mode (priority: direct API > legacy server > agent SDK)
if provider == "claude":
if _USE_DIRECT_API:
self.mode = "direct_api"
elif _USE_CLAUDE_CODE_SERVER:
self.mode = "legacy_server"
elif _USE_AGENT_SDK and AGENT_SDK_AVAILABLE:
self.mode = "agent_sdk"
else:
# Fallback to direct API if Agent SDK not available
self.mode = "direct_api"
if _USE_AGENT_SDK and not AGENT_SDK_AVAILABLE:
print("[LLM] Warning: Agent SDK not available, falling back to Direct API")
print("[LLM] Install with: pip install claude-agent-sdk")
else:
self.mode = "direct_api" # Non-Claude providers use direct API
# Usage tracking (disabled when using Agent SDK or legacy server)
self.tracker = UsageTracker() if (track_usage and self.mode == "direct_api") else None
# Set model based on mode
if provider == "claude":
if self.mode == "agent_sdk":
self.model = _DEFAULT_MODELS.get("claude_agent_sdk", "claude-sonnet-4-5-20250929")
else:
self.model = _DEFAULT_MODELS.get(provider, "claude-haiku-4-5-20251001")
else:
self.model = _DEFAULT_MODELS.get(provider, "")
# Initialize based on mode
if provider == "claude":
if self.mode == "agent_sdk":
print(f"[LLM] Using Claude Agent SDK (flat-rate subscription) with model: {self.model}")
# No initialization needed - query() is a standalone function
elif self.mode == "direct_api":
print(f"[LLM] Using Direct API (pay-per-token) with model: {self.model}")
self.client = Anthropic(api_key=self.api_key)
elif self.mode == "legacy_server":
print(f"[LLM] Using Claude Code server at {_CLAUDE_CODE_SERVER_URL} (Pro subscription) with model: {self.model}")
# Verify server is running
try:
response = requests.get(f"{_CLAUDE_CODE_SERVER_URL}/", timeout=2)
response.raise_for_status()
print(f"[LLM] Claude Code server is running: {response.json()}")
except Exception as e:
print(f"[LLM] Warning: Could not connect to Claude Code server: {e}")
print(f"[LLM] Note: Claude Code server mode is deprecated. Using Agent SDK instead.")
def chat(
self,
messages: List[Dict],
system: Optional[str] = None,
max_tokens: int = 16384,
) -> str:
"""Send chat request and get response.
Raises:
Exception: If the API call fails or returns an unexpected response.
"""
if self.provider == "claude":
# Agent SDK mode (Pro subscription)
if self.mode == "agent_sdk":
try:
# Use anyio.run to create event loop for async SDK
response = anyio.run(
self._agent_sdk_chat,
messages,
system,
max_tokens
)
return response
except Exception as e:
raise Exception(f"Agent SDK error: {e}")
# Legacy Claude Code server (Pro subscription)
elif self.mode == "legacy_server":
try:
payload = {
"messages": [{"role": m["role"], "content": m["content"]} for m in messages],
"system": system,
"max_tokens": max_tokens
}
response = requests.post(
f"{_CLAUDE_CODE_SERVER_URL}/v1/chat",
json=payload,
timeout=120
)
response.raise_for_status()
data = response.json()
return data.get("content", "")
except Exception as e:
raise Exception(f"Claude Code server error: {e}")
# Direct API (pay-per-token)
elif self.mode == "direct_api":
response = self.client.messages.create(
model=self.model,
max_tokens=max_tokens,
system=system or "",
messages=messages,
)
# Track usage
if self.tracker and hasattr(response, "usage"):
self.tracker.track(
model=self.model,
input_tokens=response.usage.input_tokens,
output_tokens=response.usage.output_tokens,
cache_creation_tokens=getattr(
response.usage, "cache_creation_input_tokens", 0
),
cache_read_tokens=getattr(
response.usage, "cache_read_input_tokens", 0
),
)
if not response.content:
return ""
return response.content[0].text
if self.provider == "glm":
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": system or ""},
] + messages,
"max_tokens": max_tokens,
}
headers = {"Authorization": f"Bearer {self.api_key}"}
response = requests.post(
_GLM_BASE_URL, json=payload, headers=headers,
timeout=60,
)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
raise ValueError(f"Unsupported provider: {self.provider}")
async def _agent_sdk_chat(
self,
messages: List[Dict],
system: Optional[str],
max_tokens: int
) -> str:
"""Internal async method for Agent SDK chat (called via anyio bridge)."""
# Convert messages to SDK format
sdk_messages = []
for msg in messages:
if msg["role"] == "user":
sdk_messages.append(UserMessage(content=msg["content"]))
elif msg["role"] == "assistant":
sdk_messages.append(AssistantMessage(content=msg["content"]))
# Add system message if provided
if system:
sdk_messages.insert(0, SystemMessage(content=system))
# Configure MCP server for file/system tools
try:
from mcp_tools import file_system_server
options = ClaudeAgentOptions(
mcp_servers={"file_system": file_system_server},
# Allow all MCP tools (file/system + web + zettelkasten)
allowed_tools=[
"read_file",
"write_file",
"edit_file",
"list_directory",
"run_command",
"web_fetch",
"fleeting_note",
"daily_note",
"literature_note",
"permanent_note",
"search_vault",
"search_by_tags",
],
)
except ImportError:
# Fallback if mcp_tools not available
options = None
# Call the new query() API
# Note: Agent SDK handles max_tokens internally, don't pass it explicitly
response = await query(
messages=sdk_messages,
options=options,
# model parameter is handled by the SDK based on settings
)
# Extract text from response
if hasattr(response, "content"):
# Handle list of content blocks
if isinstance(response.content, list):
text_parts = []
for block in response.content:
if hasattr(block, "text"):
text_parts.append(block.text)
return "".join(text_parts)
# Handle single text content
elif isinstance(response.content, str):
return response.content
return str(response)
async def _agent_sdk_chat_with_tools(
self,
messages: List[Dict],
tools: List[Dict[str, Any]],
system: Optional[str],
max_tokens: int
) -> Message:
"""Internal async method for Agent SDK chat with tools (called via anyio bridge).
NOTE: The new Claude Agent SDK (v0.1.36+) uses MCP servers for tools.
For backward compatibility with the existing tool system, we fall back
to the Direct API for tool calls. This means tool calls will consume API tokens
even when Agent SDK mode is enabled.
Uses Sonnet by default. Opus can be enabled via USE_OPUS_FOR_TOOLS=true for
extremely intensive tasks (only recommended for Agent SDK flat-rate mode).
"""
# Fallback to Direct API for tool calls (SDK tools use MCP servers)
from anthropic import Anthropic
if not self.api_key:
raise ValueError(
"ANTHROPIC_API_KEY required for tool calls in Agent SDK mode. "
"Set the API key in .env or migrate tools to MCP servers."
)
temp_client = Anthropic(api_key=self.api_key)
# Use Opus only if explicitly enabled (for intensive tasks on flat-rate)
# Otherwise default to Sonnet (cost-effective for normal tool operations)
if _USE_OPUS_FOR_TOOLS and self.mode == "agent_sdk":
model = _DEFAULT_MODELS.get("claude_agent_sdk_opus", "claude-opus-4-6")
else:
model = self.model # Use Sonnet (default)
response = temp_client.messages.create(
model=model,
max_tokens=max_tokens,
system=system or "",
messages=messages,
tools=tools,
)
return response
def _convert_sdk_response_to_message(self, sdk_response: Dict[str, Any]) -> Message:
"""Convert Agent SDK response to anthropic.types.Message format.
This ensures compatibility with agent.py's existing tool loop.
"""
# Extract content blocks
content_blocks = []
raw_content = sdk_response.get("content", [])
if isinstance(raw_content, str):
# Simple text response
content_blocks = [TextBlock(type="text", text=raw_content)]
elif isinstance(raw_content, list):
# List of content blocks
for block in raw_content:
if isinstance(block, dict):
if block.get("type") == "text":
content_blocks.append(TextBlock(
type="text",
text=block.get("text", "")
))
elif block.get("type") == "tool_use":
content_blocks.append(ToolUseBlock(
type="tool_use",
id=block.get("id", ""),
name=block.get("name", ""),
input=block.get("input", {})
))
# Extract usage information
usage_data = sdk_response.get("usage", {})
usage = Usage(
input_tokens=usage_data.get("input_tokens", 0),
output_tokens=usage_data.get("output_tokens", 0)
)
# Create Message object
# Note: We create a minimal Message-compatible object
# The Message class from anthropic.types is read-only, so we create a mock
# Capture self.model before defining inner class
model_name = sdk_response.get("model", self.model)
class MessageLike:
def __init__(self, content, stop_reason, usage, model):
self.content = content
self.stop_reason = stop_reason
self.usage = usage
self.id = sdk_response.get("id", "sdk_message")
self.model = model
self.role = "assistant"
self.type = "message"
return MessageLike(
content=content_blocks,
stop_reason=sdk_response.get("stop_reason", "end_turn"),
usage=usage,
model=model_name
)
def chat_with_tools(
self,
messages: List[Dict],
tools: List[Dict[str, Any]],
system: Optional[str] = None,
max_tokens: int = 16384,
use_cache: bool = False,
) -> Message:
"""Send chat request with tool support. Returns full Message object.
Args:
use_cache: Enable prompt caching for Sonnet models (saves 90% on repeated context)
"""
if self.provider != "claude":
raise ValueError("Tool use only supported for Claude provider")
# Agent SDK mode (Pro subscription)
if self.mode == "agent_sdk":
try:
# Use anyio.run to create event loop for async SDK
response = anyio.run(
self._agent_sdk_chat_with_tools,
messages,
tools,
system,
max_tokens
)
return response
except Exception as e:
raise Exception(f"Agent SDK error: {e}")
# Legacy Claude Code server (Pro subscription)
elif self.mode == "legacy_server":
try:
payload = {
"messages": messages,
"tools": tools,
"system": system,
"max_tokens": max_tokens
}
response = requests.post(
f"{_CLAUDE_CODE_SERVER_URL}/v1/chat/tools",
json=payload,
timeout=120
)
response.raise_for_status()
# Convert response to Message-like object
data = response.json()
# Create a mock Message object with the response
class MockMessage:
def __init__(self, data):
self.content = data.get("content", [])
self.stop_reason = data.get("stop_reason", "end_turn")
self.usage = type('obj', (object,), {
'input_tokens': data.get("usage", {}).get("input_tokens", 0),
'output_tokens': data.get("usage", {}).get("output_tokens", 0)
})
return MockMessage(data)
except Exception as e:
raise Exception(f"Claude Code server error: {e}")
# Direct API (pay-per-token)
elif self.mode == "direct_api":
# Enable caching only for Sonnet models (not worth it for Haiku)
enable_caching = use_cache and "sonnet" in self.model.lower()
# Structure system prompt for optimal caching
if enable_caching and system:
# Convert string to list format with cache control
system_blocks = [
{
"type": "text",
"text": system,
"cache_control": {"type": "ephemeral"}
}
]
else:
system_blocks = system or ""
response = self.client.messages.create(
model=self.model,
max_tokens=max_tokens,
system=system_blocks,
messages=messages,
tools=tools,
)
# Track usage
if self.tracker and hasattr(response, "usage"):
self.tracker.track(
model=self.model,
input_tokens=response.usage.input_tokens,
output_tokens=response.usage.output_tokens,
cache_creation_tokens=getattr(
response.usage, "cache_creation_input_tokens", 0
),
cache_read_tokens=getattr(
response.usage, "cache_read_input_tokens", 0
),
)
return response
def set_model(self, model: str) -> None:
"""Change the active model."""
self.model = model
def get_usage_stats(self, target_date: Optional[str] = None) -> Dict:
"""Get usage statistics and costs.
Args:
target_date: Date string (YYYY-MM-DD). If None, returns today's stats.
Returns:
Dict with cost, token counts, and breakdown by model.
"""
if not self.tracker:
return {"error": "Usage tracking not enabled"}
return self.tracker.get_daily_cost(target_date)