llm_interface.py

"""LLM Interface - Claude API, GLM, and other models."""

import os
from typing import Any, Dict, List, Optional

import requests
from anthropic import Anthropic
from anthropic.types import Message

from usage_tracker import UsageTracker

# API key environment variable names by provider
_API_KEY_ENV_VARS = {
    "claude": "ANTHROPIC_API_KEY",
    "glm": "GLM_API_KEY",
}

# Default models by provider
_DEFAULT_MODELS = {
    "claude": "claude-haiku-4-5-20251001",  # 12x cheaper than Sonnet!
    "glm": "glm-4-plus",
}

_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"


class LLMInterface:
    """Simple LLM interface supporting Claude and GLM."""

    def __init__(
        self,
        provider: str = "claude",
        api_key: Optional[str] = None,
        track_usage: bool = True,
    ) -> None:
        self.provider = provider
        self.api_key = api_key or os.getenv(
            _API_KEY_ENV_VARS.get(provider, ""),
        )
        self.model = _DEFAULT_MODELS.get(provider, "")
        self.client: Optional[Anthropic] = None

        # Usage tracking
        self.tracker = UsageTracker() if track_usage else None

        if provider == "claude":
            self.client = Anthropic(api_key=self.api_key)

    def chat(
        self,
        messages: List[Dict],
        system: Optional[str] = None,
        max_tokens: int = 4096,
    ) -> str:
        """Send chat request and get response.

        Raises:
            Exception: If the API call fails or returns an unexpected response.
        """
        if self.provider == "claude":
            response = self.client.messages.create(
                model=self.model,
                max_tokens=max_tokens,
                system=system or "",
                messages=messages,
            )

            # Track usage
            if self.tracker and hasattr(response, "usage"):
                self.tracker.track(
                    model=self.model,
                    input_tokens=response.usage.input_tokens,
                    output_tokens=response.usage.output_tokens,
                    cache_creation_tokens=getattr(
                        response.usage, "cache_creation_input_tokens", 0
                    ),
                    cache_read_tokens=getattr(
                        response.usage, "cache_read_input_tokens", 0
                    ),
                )

            if not response.content:
                return ""
            return response.content[0].text

        if self.provider == "glm":
            payload = {
                "model": self.model,
                "messages": [
                    {"role": "system", "content": system or ""},
                ] + messages,
                "max_tokens": max_tokens,
            }
            headers = {"Authorization": f"Bearer {self.api_key}"}
            response = requests.post(
                _GLM_BASE_URL, json=payload, headers=headers,
                timeout=60,
            )
            response.raise_for_status()
            return response.json()["choices"][0]["message"]["content"]

        raise ValueError(f"Unsupported provider: {self.provider}")

    def chat_with_tools(
        self,
        messages: List[Dict],
        tools: List[Dict[str, Any]],
        system: Optional[str] = None,
        max_tokens: int = 4096,
        use_cache: bool = False,
    ) -> Message:
        """Send chat request with tool support. Returns full Message object.

        Args:
            use_cache: Enable prompt caching for Sonnet models (saves 90% on repeated context)
        """
        if self.provider != "claude":
            raise ValueError("Tool use only supported for Claude provider")

        # Enable caching only for Sonnet models (not worth it for Haiku)
        enable_caching = use_cache and "sonnet" in self.model.lower()

        # Structure system prompt for optimal caching
        if enable_caching and system:
            # Convert string to list format with cache control
            system_blocks = [
                {
                    "type": "text",
                    "text": system,
                    "cache_control": {"type": "ephemeral"}
                }
            ]
        else:
            system_blocks = system or ""

        response = self.client.messages.create(
            model=self.model,
            max_tokens=max_tokens,
            system=system_blocks,
            messages=messages,
            tools=tools,
        )

        # Track usage
        if self.tracker and hasattr(response, "usage"):
            self.tracker.track(
                model=self.model,
                input_tokens=response.usage.input_tokens,
                output_tokens=response.usage.output_tokens,
                cache_creation_tokens=getattr(
                    response.usage, "cache_creation_input_tokens", 0
                ),
                cache_read_tokens=getattr(
                    response.usage, "cache_read_input_tokens", 0
                ),
            )

        return response

    def set_model(self, model: str) -> None:
        """Change the active model."""
        self.model = model

    def get_usage_stats(self, target_date: Optional[str] = None) -> Dict:
        """Get usage statistics and costs.

        Args:
            target_date: Date string (YYYY-MM-DD). If None, returns today's stats.

        Returns:
            Dict with cost, token counts, and breakdown by model.
        """
        if not self.tracker:
            return {"error": "Usage tracking not enabled"}

        return self.tracker.get_daily_cost(target_date)
Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`"""LLM Interface - Claude API, GLM, and other models."""`

			`import os`
			`from typing import Any, Dict, List, Optional`

			`import requests`
			`from anthropic import Anthropic`
			`from anthropic.types import Message`

Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00			`from usage_tracker import UsageTracker`

Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`# API key environment variable names by provider`
			`_API_KEY_ENV_VARS = {`
			`"claude": "ANTHROPIC_API_KEY",`
			`"glm": "GLM_API_KEY",`
			`}`

			`# Default models by provider`
			`_DEFAULT_MODELS = {`
			`"claude": "claude-haiku-4-5-20251001", # 12x cheaper than Sonnet!`
			`"glm": "glm-4-plus",`
			`}`

			`_GLM_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"`


			`class LLMInterface:`
			`"""Simple LLM interface supporting Claude and GLM."""`

			`def __init__(`
			`self,`
			`provider: str = "claude",`
			`api_key: Optional[str] = None,`
Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00			`track_usage: bool = True,`
Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`) -> None:`
			`self.provider = provider`
			`self.api_key = api_key or os.getenv(`
			`_API_KEY_ENV_VARS.get(provider, ""),`
			`)`
			`self.model = _DEFAULT_MODELS.get(provider, "")`
			`self.client: Optional[Anthropic] = None`

Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00			`# Usage tracking`
			`self.tracker = UsageTracker() if track_usage else None`

Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`if provider == "claude":`
			`self.client = Anthropic(api_key=self.api_key)`

			`def chat(`
			`self,`
			`messages: List[Dict],`
			`system: Optional[str] = None,`
			`max_tokens: int = 4096,`
			`) -> str:`
Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00			`"""Send chat request and get response.`

			`Raises:`
			`Exception: If the API call fails or returns an unexpected response.`
			`"""`
Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`if self.provider == "claude":`
			`response = self.client.messages.create(`
			`model=self.model,`
			`max_tokens=max_tokens,`
			`system=system or "",`
			`messages=messages,`
			`)`
Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00
			`# Track usage`
			`if self.tracker and hasattr(response, "usage"):`
			`self.tracker.track(`
			`model=self.model,`
			`input_tokens=response.usage.input_tokens,`
			`output_tokens=response.usage.output_tokens,`
			`cache_creation_tokens=getattr(`
			`response.usage, "cache_creation_input_tokens", 0`
			`),`
			`cache_read_tokens=getattr(`
			`response.usage, "cache_read_input_tokens", 0`
			`),`
			`)`

			`if not response.content:`
			`return ""`
Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`return response.content[0].text`

			`if self.provider == "glm":`
			`payload = {`
			`"model": self.model,`
			`"messages": [`
			`{"role": "system", "content": system or ""},`
			`] + messages,`
			`"max_tokens": max_tokens,`
			`}`
			`headers = {"Authorization": f"Bearer {self.api_key}"}`
			`response = requests.post(`
			`_GLM_BASE_URL, json=payload, headers=headers,`
Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00			`timeout=60,`
Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`)`
Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00			`response.raise_for_status()`
Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`return response.json()["choices"][0]["message"]["content"]`

			`raise ValueError(f"Unsupported provider: {self.provider}")`

			`def chat_with_tools(`
			`self,`
			`messages: List[Dict],`
			`tools: List[Dict[str, Any]],`
			`system: Optional[str] = None,`
			`max_tokens: int = 4096,`
			`use_cache: bool = False,`
			`) -> Message:`
			`"""Send chat request with tool support. Returns full Message object.`

			`Args:`
			`use_cache: Enable prompt caching for Sonnet models (saves 90% on repeated context)`
			`"""`
			`if self.provider != "claude":`
			`raise ValueError("Tool use only supported for Claude provider")`

			`# Enable caching only for Sonnet models (not worth it for Haiku)`
			`enable_caching = use_cache and "sonnet" in self.model.lower()`

			`# Structure system prompt for optimal caching`
			`if enable_caching and system:`
			`# Convert string to list format with cache control`
			`system_blocks = [`
			`{`
			`"type": "text",`
			`"text": system,`
			`"cache_control": {"type": "ephemeral"}`
			`}`
			`]`
			`else:`
			`system_blocks = system or ""`

			`response = self.client.messages.create(`
			`model=self.model,`
			`max_tokens=max_tokens,`
			`system=system_blocks,`
			`messages=messages,`
			`tools=tools,`
			`)`
Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00
			`# Track usage`
			`if self.tracker and hasattr(response, "usage"):`
			`self.tracker.track(`
			`model=self.model,`
			`input_tokens=response.usage.input_tokens,`
			`output_tokens=response.usage.output_tokens,`
			`cache_creation_tokens=getattr(`
			`response.usage, "cache_creation_input_tokens", 0`
			`),`
			`cache_read_tokens=getattr(`
			`response.usage, "cache_read_input_tokens", 0`
			`),`
			`)`

Initial commit: Ajarbot with optimizations Features: - Multi-platform bot (Slack, Telegram) - Memory system with SQLite FTS - Tool use capabilities (file ops, commands) - Scheduled tasks system - Dynamic model switching (/sonnet, /haiku) - Prompt caching for cost optimization Optimizations: - Default to Haiku 4.5 (12x cheaper) - Reduced context: 3 messages, 2 memory results - Optimized SOUL.md (48% smaller) - Automatic caching when using Sonnet (90% savings) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 19:06:28 -07:00			`return response`

			`def set_model(self, model: str) -> None:`
			`"""Change the active model."""`
			`self.model = model`
Add API usage tracking and dynamic task reloading Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-13 23:38:44 -07:00
			`def get_usage_stats(self, target_date: Optional[str] = None) -> Dict:`
			`"""Get usage statistics and costs.`

			`Args:`
			`target_date: Date string (YYYY-MM-DD). If None, returns today's stats.`

			`Returns:`
			`Dict with cost, token counts, and breakdown by model.`
			`"""`
			`if not self.tracker:`
			`return {"error": "Usage tracking not enabled"}`

			`return self.tracker.get_daily_cost(target_date)`