2026-02-15 09:57:39 -07:00
|
|
|
"""LLM Interface - Claude API, GLM, and other models.
|
|
|
|
|
|
|
|
|
|
Supports three modes for Claude:
|
2026-02-16 07:43:31 -07:00
|
|
|
1. Agent SDK (v0.1.36+) - DEFAULT - Uses query() API with Pro subscription
|
|
|
|
|
- Set USE_AGENT_SDK=true (default)
|
|
|
|
|
- Model: claude-sonnet-4-5-20250929 (default for all operations)
|
|
|
|
|
- Optional: USE_OPUS_FOR_TOOLS=true (enables Opus for extremely intensive tasks only)
|
|
|
|
|
- MCP Tools: File/system tools (read_file, write_file, edit_file, list_directory, run_command)
|
|
|
|
|
- Traditional Tools: Google tools & weather (fall back to Direct API, requires ANTHROPIC_API_KEY)
|
|
|
|
|
- Flat-rate subscription cost (no per-token charges for MCP tools)
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
2. Direct API (pay-per-token) - Set USE_DIRECT_API=true
|
2026-02-16 07:43:31 -07:00
|
|
|
- Model: claude-sonnet-4-5-20250929 (cost-effective, never uses Opus)
|
|
|
|
|
- Requires ANTHROPIC_API_KEY in .env
|
|
|
|
|
- Full tool support built-in (all tools via traditional API)
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
3. Legacy: Local Claude Code server - Set USE_CLAUDE_CODE_SERVER=true (deprecated)
|
2026-02-16 07:43:31 -07:00
|
|
|
- For backward compatibility only
|
2026-02-15 09:57:39 -07:00
|
|
|
"""
|
2026-02-13 19:06:28 -07:00
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
from anthropic import Anthropic
|
2026-02-15 09:57:39 -07:00
|
|
|
from anthropic.types import Message, ContentBlock, TextBlock, ToolUseBlock, Usage
|
2026-02-13 19:06:28 -07:00
|
|
|
|
2026-02-13 23:38:44 -07:00
|
|
|
from usage_tracker import UsageTracker
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Try to import Agent SDK (optional dependency)
|
|
|
|
|
try:
|
2026-02-16 07:43:31 -07:00
|
|
|
from claude_agent_sdk import (
|
|
|
|
|
query,
|
|
|
|
|
UserMessage,
|
|
|
|
|
AssistantMessage,
|
|
|
|
|
SystemMessage,
|
|
|
|
|
ClaudeAgentOptions,
|
|
|
|
|
)
|
2026-02-15 09:57:39 -07:00
|
|
|
import anyio
|
|
|
|
|
AGENT_SDK_AVAILABLE = True
|
|
|
|
|
except ImportError:
|
|
|
|
|
AGENT_SDK_AVAILABLE = False
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
# API key environment variable names by provider
|
|
|
|
|
_API_KEY_ENV_VARS = {
|
|
|
|
|
"claude": "ANTHROPIC_API_KEY",
|
|
|
|
|
"glm": "GLM_API_KEY",
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Mode selection (priority order: USE_DIRECT_API > USE_CLAUDE_CODE_SERVER > default to Agent SDK)
|
|
|
|
|
_USE_DIRECT_API = os.getenv("USE_DIRECT_API", "false").lower() == "true"
|
|
|
|
|
_CLAUDE_CODE_SERVER_URL = os.getenv("CLAUDE_CODE_SERVER_URL", "http://localhost:8000")
|
|
|
|
|
_USE_CLAUDE_CODE_SERVER = os.getenv("USE_CLAUDE_CODE_SERVER", "false").lower() == "true"
|
|
|
|
|
# Agent SDK is the default if available and no other mode is explicitly enabled
|
|
|
|
|
_USE_AGENT_SDK = os.getenv("USE_AGENT_SDK", "true").lower() == "true"
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
# Default models by provider
|
|
|
|
|
_DEFAULT_MODELS = {
|
2026-02-16 07:43:31 -07:00
|
|
|
"claude": "claude-sonnet-4-5-20250929", # For Direct API (pay-per-token) - Sonnet is cost-effective
|
|
|
|
|
"claude_agent_sdk": "claude-sonnet-4-5-20250929", # For Agent SDK (flat-rate) - Sonnet for normal operations
|
|
|
|
|
"claude_agent_sdk_opus": "claude-opus-4-6", # For Agent SDK extremely intensive tasks only (flat-rate)
|
2026-02-13 19:06:28 -07:00
|
|
|
"glm": "glm-4-plus",
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-16 07:43:31 -07:00
|
|
|
# When to use Opus (only on Agent SDK flat-rate mode)
|
|
|
|
|
_USE_OPUS_FOR_TOOLS = os.getenv("USE_OPUS_FOR_TOOLS", "false").lower() == "true"
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LLMInterface:
|
|
|
|
|
"""Simple LLM interface supporting Claude and GLM."""
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
provider: str = "claude",
|
|
|
|
|
api_key: Optional[str] = None,
|
2026-02-13 23:38:44 -07:00
|
|
|
track_usage: bool = True,
|
2026-02-13 19:06:28 -07:00
|
|
|
) -> None:
|
|
|
|
|
self.provider = provider
|
|
|
|
|
self.api_key = api_key or os.getenv(
|
|
|
|
|
_API_KEY_ENV_VARS.get(provider, ""),
|
|
|
|
|
)
|
|
|
|
|
self.client: Optional[Anthropic] = None
|
2026-02-15 10:22:23 -07:00
|
|
|
# Model will be set after determining mode
|
2026-02-15 09:57:39 -07:00
|
|
|
|
|
|
|
|
# Determine mode (priority: direct API > legacy server > agent SDK)
|
|
|
|
|
if provider == "claude":
|
|
|
|
|
if _USE_DIRECT_API:
|
|
|
|
|
self.mode = "direct_api"
|
|
|
|
|
elif _USE_CLAUDE_CODE_SERVER:
|
|
|
|
|
self.mode = "legacy_server"
|
|
|
|
|
elif _USE_AGENT_SDK and AGENT_SDK_AVAILABLE:
|
|
|
|
|
self.mode = "agent_sdk"
|
|
|
|
|
else:
|
|
|
|
|
# Fallback to direct API if Agent SDK not available
|
|
|
|
|
self.mode = "direct_api"
|
|
|
|
|
if _USE_AGENT_SDK and not AGENT_SDK_AVAILABLE:
|
|
|
|
|
print("[LLM] Warning: Agent SDK not available, falling back to Direct API")
|
|
|
|
|
print("[LLM] Install with: pip install claude-agent-sdk")
|
|
|
|
|
else:
|
|
|
|
|
self.mode = "direct_api" # Non-Claude providers use direct API
|
2026-02-13 19:06:28 -07:00
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Usage tracking (disabled when using Agent SDK or legacy server)
|
|
|
|
|
self.tracker = UsageTracker() if (track_usage and self.mode == "direct_api") else None
|
2026-02-13 23:38:44 -07:00
|
|
|
|
2026-02-15 10:22:23 -07:00
|
|
|
# Set model based on mode
|
|
|
|
|
if provider == "claude":
|
|
|
|
|
if self.mode == "agent_sdk":
|
|
|
|
|
self.model = _DEFAULT_MODELS.get("claude_agent_sdk", "claude-sonnet-4-5-20250929")
|
|
|
|
|
else:
|
|
|
|
|
self.model = _DEFAULT_MODELS.get(provider, "claude-haiku-4-5-20251001")
|
|
|
|
|
else:
|
|
|
|
|
self.model = _DEFAULT_MODELS.get(provider, "")
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Initialize based on mode
|
2026-02-13 19:06:28 -07:00
|
|
|
if provider == "claude":
|
2026-02-15 09:57:39 -07:00
|
|
|
if self.mode == "agent_sdk":
|
2026-02-15 10:22:23 -07:00
|
|
|
print(f"[LLM] Using Claude Agent SDK (flat-rate subscription) with model: {self.model}")
|
2026-02-16 07:43:31 -07:00
|
|
|
# No initialization needed - query() is a standalone function
|
2026-02-15 09:57:39 -07:00
|
|
|
elif self.mode == "direct_api":
|
2026-02-15 10:22:23 -07:00
|
|
|
print(f"[LLM] Using Direct API (pay-per-token) with model: {self.model}")
|
2026-02-15 09:57:39 -07:00
|
|
|
self.client = Anthropic(api_key=self.api_key)
|
|
|
|
|
elif self.mode == "legacy_server":
|
2026-02-15 10:22:23 -07:00
|
|
|
print(f"[LLM] Using Claude Code server at {_CLAUDE_CODE_SERVER_URL} (Pro subscription) with model: {self.model}")
|
2026-02-15 09:57:39 -07:00
|
|
|
# Verify server is running
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(f"{_CLAUDE_CODE_SERVER_URL}/", timeout=2)
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
print(f"[LLM] Claude Code server is running: {response.json()}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"[LLM] Warning: Could not connect to Claude Code server: {e}")
|
|
|
|
|
print(f"[LLM] Note: Claude Code server mode is deprecated. Using Agent SDK instead.")
|
2026-02-13 19:06:28 -07:00
|
|
|
|
|
|
|
|
def chat(
|
|
|
|
|
self,
|
|
|
|
|
messages: List[Dict],
|
|
|
|
|
system: Optional[str] = None,
|
2026-02-16 07:43:31 -07:00
|
|
|
max_tokens: int = 16384,
|
2026-02-13 19:06:28 -07:00
|
|
|
) -> str:
|
2026-02-13 23:38:44 -07:00
|
|
|
"""Send chat request and get response.
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
Exception: If the API call fails or returns an unexpected response.
|
|
|
|
|
"""
|
2026-02-13 19:06:28 -07:00
|
|
|
if self.provider == "claude":
|
2026-02-15 09:57:39 -07:00
|
|
|
# Agent SDK mode (Pro subscription)
|
|
|
|
|
if self.mode == "agent_sdk":
|
|
|
|
|
try:
|
2026-02-16 07:43:31 -07:00
|
|
|
# Use anyio.run to create event loop for async SDK
|
|
|
|
|
response = anyio.run(
|
2026-02-15 09:57:39 -07:00
|
|
|
self._agent_sdk_chat,
|
|
|
|
|
messages,
|
|
|
|
|
system,
|
|
|
|
|
max_tokens
|
|
|
|
|
)
|
|
|
|
|
return response
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise Exception(f"Agent SDK error: {e}")
|
2026-02-13 23:38:44 -07:00
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Legacy Claude Code server (Pro subscription)
|
|
|
|
|
elif self.mode == "legacy_server":
|
|
|
|
|
try:
|
|
|
|
|
payload = {
|
|
|
|
|
"messages": [{"role": m["role"], "content": m["content"]} for m in messages],
|
|
|
|
|
"system": system,
|
|
|
|
|
"max_tokens": max_tokens
|
|
|
|
|
}
|
|
|
|
|
response = requests.post(
|
|
|
|
|
f"{_CLAUDE_CODE_SERVER_URL}/v1/chat",
|
|
|
|
|
json=payload,
|
|
|
|
|
timeout=120
|
|
|
|
|
)
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
data = response.json()
|
|
|
|
|
return data.get("content", "")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise Exception(f"Claude Code server error: {e}")
|
|
|
|
|
|
|
|
|
|
# Direct API (pay-per-token)
|
|
|
|
|
elif self.mode == "direct_api":
|
|
|
|
|
response = self.client.messages.create(
|
2026-02-13 23:38:44 -07:00
|
|
|
model=self.model,
|
2026-02-15 09:57:39 -07:00
|
|
|
max_tokens=max_tokens,
|
|
|
|
|
system=system or "",
|
|
|
|
|
messages=messages,
|
2026-02-13 23:38:44 -07:00
|
|
|
)
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Track usage
|
|
|
|
|
if self.tracker and hasattr(response, "usage"):
|
|
|
|
|
self.tracker.track(
|
|
|
|
|
model=self.model,
|
|
|
|
|
input_tokens=response.usage.input_tokens,
|
|
|
|
|
output_tokens=response.usage.output_tokens,
|
|
|
|
|
cache_creation_tokens=getattr(
|
|
|
|
|
response.usage, "cache_creation_input_tokens", 0
|
|
|
|
|
),
|
|
|
|
|
cache_read_tokens=getattr(
|
|
|
|
|
response.usage, "cache_read_input_tokens", 0
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not response.content:
|
|
|
|
|
return ""
|
|
|
|
|
return response.content[0].text
|
2026-02-13 19:06:28 -07:00
|
|
|
|
|
|
|
|
if self.provider == "glm":
|
|
|
|
|
payload = {
|
|
|
|
|
"model": self.model,
|
|
|
|
|
"messages": [
|
|
|
|
|
{"role": "system", "content": system or ""},
|
|
|
|
|
] + messages,
|
|
|
|
|
"max_tokens": max_tokens,
|
|
|
|
|
}
|
|
|
|
|
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
|
|
|
response = requests.post(
|
|
|
|
|
_GLM_BASE_URL, json=payload, headers=headers,
|
2026-02-13 23:38:44 -07:00
|
|
|
timeout=60,
|
2026-02-13 19:06:28 -07:00
|
|
|
)
|
2026-02-13 23:38:44 -07:00
|
|
|
response.raise_for_status()
|
2026-02-13 19:06:28 -07:00
|
|
|
return response.json()["choices"][0]["message"]["content"]
|
|
|
|
|
|
|
|
|
|
raise ValueError(f"Unsupported provider: {self.provider}")
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
async def _agent_sdk_chat(
|
|
|
|
|
self,
|
|
|
|
|
messages: List[Dict],
|
|
|
|
|
system: Optional[str],
|
|
|
|
|
max_tokens: int
|
|
|
|
|
) -> str:
|
|
|
|
|
"""Internal async method for Agent SDK chat (called via anyio bridge)."""
|
2026-02-16 07:43:31 -07:00
|
|
|
# Convert messages to SDK format
|
|
|
|
|
sdk_messages = []
|
|
|
|
|
for msg in messages:
|
|
|
|
|
if msg["role"] == "user":
|
|
|
|
|
sdk_messages.append(UserMessage(content=msg["content"]))
|
|
|
|
|
elif msg["role"] == "assistant":
|
|
|
|
|
sdk_messages.append(AssistantMessage(content=msg["content"]))
|
|
|
|
|
|
|
|
|
|
# Add system message if provided
|
|
|
|
|
if system:
|
|
|
|
|
sdk_messages.insert(0, SystemMessage(content=system))
|
|
|
|
|
|
|
|
|
|
# Configure MCP server for file/system tools
|
|
|
|
|
try:
|
|
|
|
|
from mcp_tools import file_system_server
|
|
|
|
|
|
|
|
|
|
options = ClaudeAgentOptions(
|
|
|
|
|
mcp_servers={"file_system": file_system_server},
|
|
|
|
|
# Allow all MCP tools (file/system + web + zettelkasten)
|
|
|
|
|
allowed_tools=[
|
|
|
|
|
"read_file",
|
|
|
|
|
"write_file",
|
|
|
|
|
"edit_file",
|
|
|
|
|
"list_directory",
|
|
|
|
|
"run_command",
|
|
|
|
|
"web_fetch",
|
|
|
|
|
"fleeting_note",
|
|
|
|
|
"daily_note",
|
|
|
|
|
"literature_note",
|
|
|
|
|
"permanent_note",
|
|
|
|
|
"search_vault",
|
|
|
|
|
"search_by_tags",
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
except ImportError:
|
|
|
|
|
# Fallback if mcp_tools not available
|
|
|
|
|
options = None
|
|
|
|
|
|
|
|
|
|
# Call the new query() API
|
|
|
|
|
# Note: Agent SDK handles max_tokens internally, don't pass it explicitly
|
|
|
|
|
response = await query(
|
|
|
|
|
messages=sdk_messages,
|
|
|
|
|
options=options,
|
|
|
|
|
# model parameter is handled by the SDK based on settings
|
2026-02-15 09:57:39 -07:00
|
|
|
)
|
2026-02-16 07:43:31 -07:00
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Extract text from response
|
2026-02-16 07:43:31 -07:00
|
|
|
if hasattr(response, "content"):
|
|
|
|
|
# Handle list of content blocks
|
|
|
|
|
if isinstance(response.content, list):
|
|
|
|
|
text_parts = []
|
|
|
|
|
for block in response.content:
|
|
|
|
|
if hasattr(block, "text"):
|
|
|
|
|
text_parts.append(block.text)
|
|
|
|
|
return "".join(text_parts)
|
|
|
|
|
# Handle single text content
|
|
|
|
|
elif isinstance(response.content, str):
|
|
|
|
|
return response.content
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
return str(response)
|
|
|
|
|
|
|
|
|
|
async def _agent_sdk_chat_with_tools(
|
|
|
|
|
self,
|
|
|
|
|
messages: List[Dict],
|
|
|
|
|
tools: List[Dict[str, Any]],
|
|
|
|
|
system: Optional[str],
|
|
|
|
|
max_tokens: int
|
|
|
|
|
) -> Message:
|
2026-02-16 07:43:31 -07:00
|
|
|
"""Internal async method for Agent SDK chat with tools (called via anyio bridge).
|
|
|
|
|
|
|
|
|
|
NOTE: The new Claude Agent SDK (v0.1.36+) uses MCP servers for tools.
|
|
|
|
|
For backward compatibility with the existing tool system, we fall back
|
|
|
|
|
to the Direct API for tool calls. This means tool calls will consume API tokens
|
|
|
|
|
even when Agent SDK mode is enabled.
|
|
|
|
|
|
|
|
|
|
Uses Sonnet by default. Opus can be enabled via USE_OPUS_FOR_TOOLS=true for
|
|
|
|
|
extremely intensive tasks (only recommended for Agent SDK flat-rate mode).
|
|
|
|
|
"""
|
|
|
|
|
# Fallback to Direct API for tool calls (SDK tools use MCP servers)
|
|
|
|
|
from anthropic import Anthropic
|
|
|
|
|
|
|
|
|
|
if not self.api_key:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"ANTHROPIC_API_KEY required for tool calls in Agent SDK mode. "
|
|
|
|
|
"Set the API key in .env or migrate tools to MCP servers."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
temp_client = Anthropic(api_key=self.api_key)
|
|
|
|
|
|
|
|
|
|
# Use Opus only if explicitly enabled (for intensive tasks on flat-rate)
|
|
|
|
|
# Otherwise default to Sonnet (cost-effective for normal tool operations)
|
|
|
|
|
if _USE_OPUS_FOR_TOOLS and self.mode == "agent_sdk":
|
|
|
|
|
model = _DEFAULT_MODELS.get("claude_agent_sdk_opus", "claude-opus-4-6")
|
|
|
|
|
else:
|
|
|
|
|
model = self.model # Use Sonnet (default)
|
|
|
|
|
|
|
|
|
|
response = temp_client.messages.create(
|
|
|
|
|
model=model,
|
|
|
|
|
max_tokens=max_tokens,
|
|
|
|
|
system=system or "",
|
2026-02-15 09:57:39 -07:00
|
|
|
messages=messages,
|
|
|
|
|
tools=tools,
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-16 07:43:31 -07:00
|
|
|
return response
|
2026-02-15 09:57:39 -07:00
|
|
|
|
|
|
|
|
def _convert_sdk_response_to_message(self, sdk_response: Dict[str, Any]) -> Message:
|
|
|
|
|
"""Convert Agent SDK response to anthropic.types.Message format.
|
|
|
|
|
|
|
|
|
|
This ensures compatibility with agent.py's existing tool loop.
|
|
|
|
|
"""
|
|
|
|
|
# Extract content blocks
|
|
|
|
|
content_blocks = []
|
|
|
|
|
raw_content = sdk_response.get("content", [])
|
|
|
|
|
|
|
|
|
|
if isinstance(raw_content, str):
|
|
|
|
|
# Simple text response
|
|
|
|
|
content_blocks = [TextBlock(type="text", text=raw_content)]
|
|
|
|
|
elif isinstance(raw_content, list):
|
|
|
|
|
# List of content blocks
|
|
|
|
|
for block in raw_content:
|
|
|
|
|
if isinstance(block, dict):
|
|
|
|
|
if block.get("type") == "text":
|
|
|
|
|
content_blocks.append(TextBlock(
|
|
|
|
|
type="text",
|
|
|
|
|
text=block.get("text", "")
|
|
|
|
|
))
|
|
|
|
|
elif block.get("type") == "tool_use":
|
|
|
|
|
content_blocks.append(ToolUseBlock(
|
|
|
|
|
type="tool_use",
|
|
|
|
|
id=block.get("id", ""),
|
|
|
|
|
name=block.get("name", ""),
|
|
|
|
|
input=block.get("input", {})
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
# Extract usage information
|
|
|
|
|
usage_data = sdk_response.get("usage", {})
|
|
|
|
|
usage = Usage(
|
|
|
|
|
input_tokens=usage_data.get("input_tokens", 0),
|
|
|
|
|
output_tokens=usage_data.get("output_tokens", 0)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Create Message object
|
|
|
|
|
# Note: We create a minimal Message-compatible object
|
|
|
|
|
# The Message class from anthropic.types is read-only, so we create a mock
|
|
|
|
|
# Capture self.model before defining inner class
|
|
|
|
|
model_name = sdk_response.get("model", self.model)
|
|
|
|
|
|
|
|
|
|
class MessageLike:
|
|
|
|
|
def __init__(self, content, stop_reason, usage, model):
|
|
|
|
|
self.content = content
|
|
|
|
|
self.stop_reason = stop_reason
|
|
|
|
|
self.usage = usage
|
|
|
|
|
self.id = sdk_response.get("id", "sdk_message")
|
|
|
|
|
self.model = model
|
|
|
|
|
self.role = "assistant"
|
|
|
|
|
self.type = "message"
|
|
|
|
|
|
|
|
|
|
return MessageLike(
|
|
|
|
|
content=content_blocks,
|
|
|
|
|
stop_reason=sdk_response.get("stop_reason", "end_turn"),
|
|
|
|
|
usage=usage,
|
|
|
|
|
model=model_name
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
def chat_with_tools(
|
|
|
|
|
self,
|
|
|
|
|
messages: List[Dict],
|
|
|
|
|
tools: List[Dict[str, Any]],
|
|
|
|
|
system: Optional[str] = None,
|
2026-02-16 07:43:31 -07:00
|
|
|
max_tokens: int = 16384,
|
2026-02-13 19:06:28 -07:00
|
|
|
use_cache: bool = False,
|
|
|
|
|
) -> Message:
|
|
|
|
|
"""Send chat request with tool support. Returns full Message object.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
use_cache: Enable prompt caching for Sonnet models (saves 90% on repeated context)
|
|
|
|
|
"""
|
|
|
|
|
if self.provider != "claude":
|
|
|
|
|
raise ValueError("Tool use only supported for Claude provider")
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Agent SDK mode (Pro subscription)
|
|
|
|
|
if self.mode == "agent_sdk":
|
|
|
|
|
try:
|
2026-02-16 07:43:31 -07:00
|
|
|
# Use anyio.run to create event loop for async SDK
|
|
|
|
|
response = anyio.run(
|
2026-02-15 09:57:39 -07:00
|
|
|
self._agent_sdk_chat_with_tools,
|
|
|
|
|
messages,
|
|
|
|
|
tools,
|
|
|
|
|
system,
|
|
|
|
|
max_tokens
|
|
|
|
|
)
|
|
|
|
|
return response
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise Exception(f"Agent SDK error: {e}")
|
|
|
|
|
|
|
|
|
|
# Legacy Claude Code server (Pro subscription)
|
|
|
|
|
elif self.mode == "legacy_server":
|
|
|
|
|
try:
|
|
|
|
|
payload = {
|
|
|
|
|
"messages": messages,
|
|
|
|
|
"tools": tools,
|
|
|
|
|
"system": system,
|
|
|
|
|
"max_tokens": max_tokens
|
2026-02-13 19:06:28 -07:00
|
|
|
}
|
2026-02-15 09:57:39 -07:00
|
|
|
response = requests.post(
|
|
|
|
|
f"{_CLAUDE_CODE_SERVER_URL}/v1/chat/tools",
|
|
|
|
|
json=payload,
|
|
|
|
|
timeout=120
|
|
|
|
|
)
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
# Convert response to Message-like object
|
|
|
|
|
data = response.json()
|
2026-02-13 19:06:28 -07:00
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Create a mock Message object with the response
|
|
|
|
|
class MockMessage:
|
|
|
|
|
def __init__(self, data):
|
|
|
|
|
self.content = data.get("content", [])
|
|
|
|
|
self.stop_reason = data.get("stop_reason", "end_turn")
|
|
|
|
|
self.usage = type('obj', (object,), {
|
|
|
|
|
'input_tokens': data.get("usage", {}).get("input_tokens", 0),
|
|
|
|
|
'output_tokens': data.get("usage", {}).get("output_tokens", 0)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return MockMessage(data)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise Exception(f"Claude Code server error: {e}")
|
2026-02-13 23:38:44 -07:00
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Direct API (pay-per-token)
|
|
|
|
|
elif self.mode == "direct_api":
|
|
|
|
|
# Enable caching only for Sonnet models (not worth it for Haiku)
|
|
|
|
|
enable_caching = use_cache and "sonnet" in self.model.lower()
|
|
|
|
|
|
|
|
|
|
# Structure system prompt for optimal caching
|
|
|
|
|
if enable_caching and system:
|
|
|
|
|
# Convert string to list format with cache control
|
|
|
|
|
system_blocks = [
|
|
|
|
|
{
|
|
|
|
|
"type": "text",
|
|
|
|
|
"text": system,
|
|
|
|
|
"cache_control": {"type": "ephemeral"}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
else:
|
|
|
|
|
system_blocks = system or ""
|
|
|
|
|
|
|
|
|
|
response = self.client.messages.create(
|
2026-02-13 23:38:44 -07:00
|
|
|
model=self.model,
|
2026-02-15 09:57:39 -07:00
|
|
|
max_tokens=max_tokens,
|
|
|
|
|
system=system_blocks,
|
|
|
|
|
messages=messages,
|
|
|
|
|
tools=tools,
|
2026-02-13 23:38:44 -07:00
|
|
|
)
|
|
|
|
|
|
2026-02-15 09:57:39 -07:00
|
|
|
# Track usage
|
|
|
|
|
if self.tracker and hasattr(response, "usage"):
|
|
|
|
|
self.tracker.track(
|
|
|
|
|
model=self.model,
|
|
|
|
|
input_tokens=response.usage.input_tokens,
|
|
|
|
|
output_tokens=response.usage.output_tokens,
|
|
|
|
|
cache_creation_tokens=getattr(
|
|
|
|
|
response.usage, "cache_creation_input_tokens", 0
|
|
|
|
|
),
|
|
|
|
|
cache_read_tokens=getattr(
|
|
|
|
|
response.usage, "cache_read_input_tokens", 0
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return response
|
2026-02-13 19:06:28 -07:00
|
|
|
|
|
|
|
|
def set_model(self, model: str) -> None:
|
|
|
|
|
"""Change the active model."""
|
|
|
|
|
self.model = model
|
2026-02-13 23:38:44 -07:00
|
|
|
|
|
|
|
|
def get_usage_stats(self, target_date: Optional[str] = None) -> Dict:
|
|
|
|
|
"""Get usage statistics and costs.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
target_date: Date string (YYYY-MM-DD). If None, returns today's stats.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict with cost, token counts, and breakdown by model.
|
|
|
|
|
"""
|
|
|
|
|
if not self.tracker:
|
|
|
|
|
return {"error": "Usage tracking not enabled"}
|
|
|
|
|
|
|
|
|
|
return self.tracker.get_daily_cost(target_date)
|