"""LLM Interface - Claude API, GLM, and other models.""" import os from typing import Any, Dict, List, Optional import requests from anthropic import Anthropic from anthropic.types import Message from usage_tracker import UsageTracker # API key environment variable names by provider _API_KEY_ENV_VARS = { "claude": "ANTHROPIC_API_KEY", "glm": "GLM_API_KEY", } # Default models by provider _DEFAULT_MODELS = { "claude": "claude-haiku-4-5-20251001", # 12x cheaper than Sonnet! "glm": "glm-4-plus", } _GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions" class LLMInterface: """Simple LLM interface supporting Claude and GLM.""" def __init__( self, provider: str = "claude", api_key: Optional[str] = None, track_usage: bool = True, ) -> None: self.provider = provider self.api_key = api_key or os.getenv( _API_KEY_ENV_VARS.get(provider, ""), ) self.model = _DEFAULT_MODELS.get(provider, "") self.client: Optional[Anthropic] = None # Usage tracking self.tracker = UsageTracker() if track_usage else None if provider == "claude": self.client = Anthropic(api_key=self.api_key) def chat( self, messages: List[Dict], system: Optional[str] = None, max_tokens: int = 4096, ) -> str: """Send chat request and get response. Raises: Exception: If the API call fails or returns an unexpected response. """ if self.provider == "claude": response = self.client.messages.create( model=self.model, max_tokens=max_tokens, system=system or "", messages=messages, ) # Track usage if self.tracker and hasattr(response, "usage"): self.tracker.track( model=self.model, input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, cache_creation_tokens=getattr( response.usage, "cache_creation_input_tokens", 0 ), cache_read_tokens=getattr( response.usage, "cache_read_input_tokens", 0 ), ) if not response.content: return "" return response.content[0].text if self.provider == "glm": payload = { "model": self.model, "messages": [ {"role": "system", "content": system or ""}, ] + messages, "max_tokens": max_tokens, } headers = {"Authorization": f"Bearer {self.api_key}"} response = requests.post( _GLM_BASE_URL, json=payload, headers=headers, timeout=60, ) response.raise_for_status() return response.json()["choices"][0]["message"]["content"] raise ValueError(f"Unsupported provider: {self.provider}") def chat_with_tools( self, messages: List[Dict], tools: List[Dict[str, Any]], system: Optional[str] = None, max_tokens: int = 4096, use_cache: bool = False, ) -> Message: """Send chat request with tool support. Returns full Message object. Args: use_cache: Enable prompt caching for Sonnet models (saves 90% on repeated context) """ if self.provider != "claude": raise ValueError("Tool use only supported for Claude provider") # Enable caching only for Sonnet models (not worth it for Haiku) enable_caching = use_cache and "sonnet" in self.model.lower() # Structure system prompt for optimal caching if enable_caching and system: # Convert string to list format with cache control system_blocks = [ { "type": "text", "text": system, "cache_control": {"type": "ephemeral"} } ] else: system_blocks = system or "" response = self.client.messages.create( model=self.model, max_tokens=max_tokens, system=system_blocks, messages=messages, tools=tools, ) # Track usage if self.tracker and hasattr(response, "usage"): self.tracker.track( model=self.model, input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, cache_creation_tokens=getattr( response.usage, "cache_creation_input_tokens", 0 ), cache_read_tokens=getattr( response.usage, "cache_read_input_tokens", 0 ), ) return response def set_model(self, model: str) -> None: """Change the active model.""" self.model = model def get_usage_stats(self, target_date: Optional[str] = None) -> Dict: """Get usage statistics and costs. Args: target_date: Date string (YYYY-MM-DD). If None, returns today's stats. Returns: Dict with cost, token counts, and breakdown by model. """ if not self.tracker: return {"error": "Usage tracking not enabled"} return self.tracker.get_daily_cost(target_date)