2026-02-13 19:06:28 -07:00
|
|
|
"""LLM Interface - Claude API, GLM, and other models."""
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
from anthropic import Anthropic
|
|
|
|
|
from anthropic.types import Message
|
|
|
|
|
|
2026-02-13 23:38:44 -07:00
|
|
|
from usage_tracker import UsageTracker
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
# API key environment variable names by provider
|
|
|
|
|
_API_KEY_ENV_VARS = {
|
|
|
|
|
"claude": "ANTHROPIC_API_KEY",
|
|
|
|
|
"glm": "GLM_API_KEY",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Default models by provider
|
|
|
|
|
_DEFAULT_MODELS = {
|
|
|
|
|
"claude": "claude-haiku-4-5-20251001", # 12x cheaper than Sonnet!
|
|
|
|
|
"glm": "glm-4-plus",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LLMInterface:
|
|
|
|
|
"""Simple LLM interface supporting Claude and GLM."""
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
provider: str = "claude",
|
|
|
|
|
api_key: Optional[str] = None,
|
2026-02-13 23:38:44 -07:00
|
|
|
track_usage: bool = True,
|
2026-02-13 19:06:28 -07:00
|
|
|
) -> None:
|
|
|
|
|
self.provider = provider
|
|
|
|
|
self.api_key = api_key or os.getenv(
|
|
|
|
|
_API_KEY_ENV_VARS.get(provider, ""),
|
|
|
|
|
)
|
|
|
|
|
self.model = _DEFAULT_MODELS.get(provider, "")
|
|
|
|
|
self.client: Optional[Anthropic] = None
|
|
|
|
|
|
2026-02-13 23:38:44 -07:00
|
|
|
# Usage tracking
|
|
|
|
|
self.tracker = UsageTracker() if track_usage else None
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
if provider == "claude":
|
|
|
|
|
self.client = Anthropic(api_key=self.api_key)
|
|
|
|
|
|
|
|
|
|
def chat(
|
|
|
|
|
self,
|
|
|
|
|
messages: List[Dict],
|
|
|
|
|
system: Optional[str] = None,
|
|
|
|
|
max_tokens: int = 4096,
|
|
|
|
|
) -> str:
|
2026-02-13 23:38:44 -07:00
|
|
|
"""Send chat request and get response.
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
Exception: If the API call fails or returns an unexpected response.
|
|
|
|
|
"""
|
2026-02-13 19:06:28 -07:00
|
|
|
if self.provider == "claude":
|
|
|
|
|
response = self.client.messages.create(
|
|
|
|
|
model=self.model,
|
|
|
|
|
max_tokens=max_tokens,
|
|
|
|
|
system=system or "",
|
|
|
|
|
messages=messages,
|
|
|
|
|
)
|
2026-02-13 23:38:44 -07:00
|
|
|
|
|
|
|
|
# Track usage
|
|
|
|
|
if self.tracker and hasattr(response, "usage"):
|
|
|
|
|
self.tracker.track(
|
|
|
|
|
model=self.model,
|
|
|
|
|
input_tokens=response.usage.input_tokens,
|
|
|
|
|
output_tokens=response.usage.output_tokens,
|
|
|
|
|
cache_creation_tokens=getattr(
|
|
|
|
|
response.usage, "cache_creation_input_tokens", 0
|
|
|
|
|
),
|
|
|
|
|
cache_read_tokens=getattr(
|
|
|
|
|
response.usage, "cache_read_input_tokens", 0
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not response.content:
|
|
|
|
|
return ""
|
2026-02-13 19:06:28 -07:00
|
|
|
return response.content[0].text
|
|
|
|
|
|
|
|
|
|
if self.provider == "glm":
|
|
|
|
|
payload = {
|
|
|
|
|
"model": self.model,
|
|
|
|
|
"messages": [
|
|
|
|
|
{"role": "system", "content": system or ""},
|
|
|
|
|
] + messages,
|
|
|
|
|
"max_tokens": max_tokens,
|
|
|
|
|
}
|
|
|
|
|
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
|
|
|
response = requests.post(
|
|
|
|
|
_GLM_BASE_URL, json=payload, headers=headers,
|
2026-02-13 23:38:44 -07:00
|
|
|
timeout=60,
|
2026-02-13 19:06:28 -07:00
|
|
|
)
|
2026-02-13 23:38:44 -07:00
|
|
|
response.raise_for_status()
|
2026-02-13 19:06:28 -07:00
|
|
|
return response.json()["choices"][0]["message"]["content"]
|
|
|
|
|
|
|
|
|
|
raise ValueError(f"Unsupported provider: {self.provider}")
|
|
|
|
|
|
|
|
|
|
def chat_with_tools(
|
|
|
|
|
self,
|
|
|
|
|
messages: List[Dict],
|
|
|
|
|
tools: List[Dict[str, Any]],
|
|
|
|
|
system: Optional[str] = None,
|
|
|
|
|
max_tokens: int = 4096,
|
|
|
|
|
use_cache: bool = False,
|
|
|
|
|
) -> Message:
|
|
|
|
|
"""Send chat request with tool support. Returns full Message object.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
use_cache: Enable prompt caching for Sonnet models (saves 90% on repeated context)
|
|
|
|
|
"""
|
|
|
|
|
if self.provider != "claude":
|
|
|
|
|
raise ValueError("Tool use only supported for Claude provider")
|
|
|
|
|
|
|
|
|
|
# Enable caching only for Sonnet models (not worth it for Haiku)
|
|
|
|
|
enable_caching = use_cache and "sonnet" in self.model.lower()
|
|
|
|
|
|
|
|
|
|
# Structure system prompt for optimal caching
|
|
|
|
|
if enable_caching and system:
|
|
|
|
|
# Convert string to list format with cache control
|
|
|
|
|
system_blocks = [
|
|
|
|
|
{
|
|
|
|
|
"type": "text",
|
|
|
|
|
"text": system,
|
|
|
|
|
"cache_control": {"type": "ephemeral"}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
else:
|
|
|
|
|
system_blocks = system or ""
|
|
|
|
|
|
|
|
|
|
response = self.client.messages.create(
|
|
|
|
|
model=self.model,
|
|
|
|
|
max_tokens=max_tokens,
|
|
|
|
|
system=system_blocks,
|
|
|
|
|
messages=messages,
|
|
|
|
|
tools=tools,
|
|
|
|
|
)
|
2026-02-13 23:38:44 -07:00
|
|
|
|
|
|
|
|
# Track usage
|
|
|
|
|
if self.tracker and hasattr(response, "usage"):
|
|
|
|
|
self.tracker.track(
|
|
|
|
|
model=self.model,
|
|
|
|
|
input_tokens=response.usage.input_tokens,
|
|
|
|
|
output_tokens=response.usage.output_tokens,
|
|
|
|
|
cache_creation_tokens=getattr(
|
|
|
|
|
response.usage, "cache_creation_input_tokens", 0
|
|
|
|
|
),
|
|
|
|
|
cache_read_tokens=getattr(
|
|
|
|
|
response.usage, "cache_read_input_tokens", 0
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
def set_model(self, model: str) -> None:
|
|
|
|
|
"""Change the active model."""
|
|
|
|
|
self.model = model
|
2026-02-13 23:38:44 -07:00
|
|
|
|
|
|
|
|
def get_usage_stats(self, target_date: Optional[str] = None) -> Dict:
|
|
|
|
|
"""Get usage statistics and costs.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
target_date: Date string (YYYY-MM-DD). If None, returns today's stats.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict with cost, token counts, and breakdown by model.
|
|
|
|
|
"""
|
|
|
|
|
if not self.tracker:
|
|
|
|
|
return {"error": "Usage tracking not enabled"}
|
|
|
|
|
|
|
|
|
|
return self.tracker.get_daily_cost(target_date)
|