Files
ajarbot/llm_interface.py
Jordan Ramos 8afff96bb5 Add API usage tracking and dynamic task reloading
Features:
- Usage tracking system (usage_tracker.py)
  - Tracks input/output tokens per API call
  - Calculates costs with support for cache pricing
  - Stores data in usage_data.json (gitignored)
  - Integrated into llm_interface.py

- Dynamic task scheduler reloading
  - Auto-detects YAML changes every 60s
  - No restart needed for new tasks
  - reload_tasks() method for manual refresh

- Example cost tracking scheduled task
  - Daily API usage report
  - Budget tracking ($5/month target)
  - Disabled by default in scheduled_tasks.yaml

Improvements:
- Fixed tool_use/tool_result pair splitting bug (CRITICAL)
- Added thread safety to agent.chat()
- Fixed N+1 query problem in hybrid search
- Optimized database batch queries
- Added conversation history pruning (50 messages max)

Updated .gitignore:
- Exclude user profiles (memory_workspace/users/*.md)
- Exclude usage data (usage_data.json)
- Exclude vector index (vectors.usearch)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-13 23:38:44 -07:00

177 lines
5.5 KiB
Python

"""LLM Interface - Claude API, GLM, and other models."""
import os
from typing import Any, Dict, List, Optional
import requests
from anthropic import Anthropic
from anthropic.types import Message
from usage_tracker import UsageTracker
# API key environment variable names by provider
_API_KEY_ENV_VARS = {
"claude": "ANTHROPIC_API_KEY",
"glm": "GLM_API_KEY",
}
# Default models by provider
_DEFAULT_MODELS = {
"claude": "claude-haiku-4-5-20251001", # 12x cheaper than Sonnet!
"glm": "glm-4-plus",
}
_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
class LLMInterface:
"""Simple LLM interface supporting Claude and GLM."""
def __init__(
self,
provider: str = "claude",
api_key: Optional[str] = None,
track_usage: bool = True,
) -> None:
self.provider = provider
self.api_key = api_key or os.getenv(
_API_KEY_ENV_VARS.get(provider, ""),
)
self.model = _DEFAULT_MODELS.get(provider, "")
self.client: Optional[Anthropic] = None
# Usage tracking
self.tracker = UsageTracker() if track_usage else None
if provider == "claude":
self.client = Anthropic(api_key=self.api_key)
def chat(
self,
messages: List[Dict],
system: Optional[str] = None,
max_tokens: int = 4096,
) -> str:
"""Send chat request and get response.
Raises:
Exception: If the API call fails or returns an unexpected response.
"""
if self.provider == "claude":
response = self.client.messages.create(
model=self.model,
max_tokens=max_tokens,
system=system or "",
messages=messages,
)
# Track usage
if self.tracker and hasattr(response, "usage"):
self.tracker.track(
model=self.model,
input_tokens=response.usage.input_tokens,
output_tokens=response.usage.output_tokens,
cache_creation_tokens=getattr(
response.usage, "cache_creation_input_tokens", 0
),
cache_read_tokens=getattr(
response.usage, "cache_read_input_tokens", 0
),
)
if not response.content:
return ""
return response.content[0].text
if self.provider == "glm":
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": system or ""},
] + messages,
"max_tokens": max_tokens,
}
headers = {"Authorization": f"Bearer {self.api_key}"}
response = requests.post(
_GLM_BASE_URL, json=payload, headers=headers,
timeout=60,
)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
raise ValueError(f"Unsupported provider: {self.provider}")
def chat_with_tools(
self,
messages: List[Dict],
tools: List[Dict[str, Any]],
system: Optional[str] = None,
max_tokens: int = 4096,
use_cache: bool = False,
) -> Message:
"""Send chat request with tool support. Returns full Message object.
Args:
use_cache: Enable prompt caching for Sonnet models (saves 90% on repeated context)
"""
if self.provider != "claude":
raise ValueError("Tool use only supported for Claude provider")
# Enable caching only for Sonnet models (not worth it for Haiku)
enable_caching = use_cache and "sonnet" in self.model.lower()
# Structure system prompt for optimal caching
if enable_caching and system:
# Convert string to list format with cache control
system_blocks = [
{
"type": "text",
"text": system,
"cache_control": {"type": "ephemeral"}
}
]
else:
system_blocks = system or ""
response = self.client.messages.create(
model=self.model,
max_tokens=max_tokens,
system=system_blocks,
messages=messages,
tools=tools,
)
# Track usage
if self.tracker and hasattr(response, "usage"):
self.tracker.track(
model=self.model,
input_tokens=response.usage.input_tokens,
output_tokens=response.usage.output_tokens,
cache_creation_tokens=getattr(
response.usage, "cache_creation_input_tokens", 0
),
cache_read_tokens=getattr(
response.usage, "cache_read_input_tokens", 0
),
)
return response
def set_model(self, model: str) -> None:
"""Change the active model."""
self.model = model
def get_usage_stats(self, target_date: Optional[str] = None) -> Dict:
"""Get usage statistics and costs.
Args:
target_date: Date string (YYYY-MM-DD). If None, returns today's stats.
Returns:
Dict with cost, token counts, and breakdown by model.
"""
if not self.tracker:
return {"error": "Usage tracking not enabled"}
return self.tracker.get_daily_cost(target_date)