Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
207 lines
6.5 KiB
Python
207 lines
6.5 KiB
Python
"""Track LLM API usage and costs."""
|
|
|
|
import json
|
|
from datetime import datetime, date
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
|
|
|
|
# Pricing per 1M tokens (as of 2026-02-13)
|
|
_PRICING = {
|
|
"claude-haiku-4-5-20251001": {
|
|
"input": 0.25,
|
|
"output": 1.25,
|
|
},
|
|
"claude-sonnet-4-5-20250929": {
|
|
"input": 3.00,
|
|
"output": 15.00,
|
|
"cache_write": 3.75, # Cache creation
|
|
"cache_read": 0.30, # 90% discount on cache hits
|
|
},
|
|
"claude-opus-4-6": {
|
|
"input": 15.00,
|
|
"output": 75.00,
|
|
"cache_write": 18.75,
|
|
"cache_read": 1.50,
|
|
},
|
|
}
|
|
|
|
|
|
class UsageTracker:
|
|
"""Track and calculate costs for LLM API usage."""
|
|
|
|
def __init__(self, storage_file: str = "usage_data.json") -> None:
|
|
self.storage_file = Path(storage_file)
|
|
self.usage_data: List[Dict] = []
|
|
self._load()
|
|
|
|
def _load(self) -> None:
|
|
"""Load usage data from file."""
|
|
if self.storage_file.exists():
|
|
with open(self.storage_file, encoding="utf-8") as f:
|
|
self.usage_data = json.load(f)
|
|
|
|
def _save(self) -> None:
|
|
"""Save usage data to file."""
|
|
with open(self.storage_file, "w", encoding="utf-8") as f:
|
|
json.dump(self.usage_data, f, indent=2)
|
|
|
|
def track(
|
|
self,
|
|
model: str,
|
|
input_tokens: int,
|
|
output_tokens: int,
|
|
cache_creation_tokens: int = 0,
|
|
cache_read_tokens: int = 0,
|
|
) -> None:
|
|
"""Record an API call's token usage."""
|
|
entry = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"date": str(date.today()),
|
|
"model": model,
|
|
"input_tokens": input_tokens,
|
|
"output_tokens": output_tokens,
|
|
"cache_creation_tokens": cache_creation_tokens,
|
|
"cache_read_tokens": cache_read_tokens,
|
|
}
|
|
self.usage_data.append(entry)
|
|
self._save()
|
|
|
|
def get_daily_usage(
|
|
self, target_date: Optional[str] = None
|
|
) -> Dict[str, int]:
|
|
"""Get total token usage for a specific date.
|
|
|
|
Args:
|
|
target_date: Date string (YYYY-MM-DD). Defaults to today.
|
|
|
|
Returns:
|
|
Dict with total tokens by type.
|
|
"""
|
|
if target_date is None:
|
|
target_date = str(date.today())
|
|
|
|
totals = {
|
|
"input_tokens": 0,
|
|
"output_tokens": 0,
|
|
"cache_creation_tokens": 0,
|
|
"cache_read_tokens": 0,
|
|
}
|
|
|
|
for entry in self.usage_data:
|
|
if entry.get("date") == target_date:
|
|
totals["input_tokens"] += entry.get("input_tokens", 0)
|
|
totals["output_tokens"] += entry.get("output_tokens", 0)
|
|
totals["cache_creation_tokens"] += entry.get(
|
|
"cache_creation_tokens", 0
|
|
)
|
|
totals["cache_read_tokens"] += entry.get(
|
|
"cache_read_tokens", 0
|
|
)
|
|
|
|
return totals
|
|
|
|
def calculate_cost(
|
|
self,
|
|
model: str,
|
|
input_tokens: int,
|
|
output_tokens: int,
|
|
cache_creation_tokens: int = 0,
|
|
cache_read_tokens: int = 0,
|
|
) -> float:
|
|
"""Calculate cost in USD for token usage.
|
|
|
|
Args:
|
|
model: Model name (e.g., "claude-haiku-4-5-20251001")
|
|
input_tokens: Number of input tokens
|
|
output_tokens: Number of output tokens
|
|
cache_creation_tokens: Tokens written to cache (Sonnet/Opus only)
|
|
cache_read_tokens: Tokens read from cache (Sonnet/Opus only)
|
|
|
|
Returns:
|
|
Total cost in USD
|
|
"""
|
|
pricing = _PRICING.get(model)
|
|
if not pricing:
|
|
# Unknown model, estimate using Haiku pricing (conservative)
|
|
pricing = _PRICING["claude-haiku-4-5-20251001"]
|
|
|
|
cost = 0.0
|
|
|
|
# Base input/output costs
|
|
cost += (input_tokens / 1_000_000) * pricing["input"]
|
|
cost += (output_tokens / 1_000_000) * pricing["output"]
|
|
|
|
# Cache costs (Sonnet/Opus only)
|
|
if cache_creation_tokens and "cache_write" in pricing:
|
|
cost += (cache_creation_tokens / 1_000_000) * pricing["cache_write"]
|
|
if cache_read_tokens and "cache_read" in pricing:
|
|
cost += (cache_read_tokens / 1_000_000) * pricing["cache_read"]
|
|
|
|
return cost
|
|
|
|
def get_daily_cost(self, target_date: Optional[str] = None) -> Dict:
|
|
"""Get total cost and breakdown for a specific date.
|
|
|
|
Returns:
|
|
Dict with total_cost, breakdown by model, and token counts
|
|
"""
|
|
if target_date is None:
|
|
target_date = str(date.today())
|
|
|
|
total_cost = 0.0
|
|
model_breakdown: Dict[str, float] = {}
|
|
totals = self.get_daily_usage(target_date)
|
|
|
|
for entry in self.usage_data:
|
|
if entry.get("date") != target_date:
|
|
continue
|
|
|
|
model = entry["model"]
|
|
cost = self.calculate_cost(
|
|
model=model,
|
|
input_tokens=entry.get("input_tokens", 0),
|
|
output_tokens=entry.get("output_tokens", 0),
|
|
cache_creation_tokens=entry.get("cache_creation_tokens", 0),
|
|
cache_read_tokens=entry.get("cache_read_tokens", 0),
|
|
)
|
|
|
|
total_cost += cost
|
|
model_breakdown[model] = model_breakdown.get(model, 0.0) + cost
|
|
|
|
return {
|
|
"date": target_date,
|
|
"total_cost": round(total_cost, 4),
|
|
"model_breakdown": {
|
|
k: round(v, 4) for k, v in model_breakdown.items()
|
|
},
|
|
"token_totals": totals,
|
|
}
|
|
|
|
def get_total_cost(self) -> Dict:
|
|
"""Get lifetime total cost and stats."""
|
|
total_cost = 0.0
|
|
total_calls = len(self.usage_data)
|
|
model_breakdown: Dict[str, float] = {}
|
|
|
|
for entry in self.usage_data:
|
|
model = entry["model"]
|
|
cost = self.calculate_cost(
|
|
model=model,
|
|
input_tokens=entry.get("input_tokens", 0),
|
|
output_tokens=entry.get("output_tokens", 0),
|
|
cache_creation_tokens=entry.get("cache_creation_tokens", 0),
|
|
cache_read_tokens=entry.get("cache_read_tokens", 0),
|
|
)
|
|
|
|
total_cost += cost
|
|
model_breakdown[model] = model_breakdown.get(model, 0.0) + cost
|
|
|
|
return {
|
|
"total_cost": round(total_cost, 4),
|
|
"total_calls": total_calls,
|
|
"model_breakdown": {
|
|
k: round(v, 4) for k, v in model_breakdown.items()
|
|
},
|
|
}
|