Add API usage tracking and dynamic task reloading

Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-13 23:38:44 -07:00
parent ab3a5afd59
commit 8afff96bb5
16 changed files with 1096 additions and 244 deletions
--- a/agent.py
+++ b/agent.py
@@ -1,5 +1,6 @@
 """AI Agent with Memory and LLM Integration."""

+import threading
 from typing import List, Optional

 from heartbeat import Heartbeat
@@ -12,6 +13,8 @@ from tools import TOOL_DEFINITIONS, execute_tool
 MAX_CONTEXT_MESSAGES = 3  # Reduced from 5 to save tokens
 # Maximum characters of agent response to store in memory
 MEMORY_RESPONSE_PREVIEW_LENGTH = 200
+# Maximum conversation history entries before pruning
+MAX_CONVERSATION_HISTORY = 50


 class Agent:
@@ -27,6 +30,7 @@ class Agent:
        self.llm = LLMInterface(provider)
        self.hooks = HooksSystem()
        self.conversation_history: List[dict] = []
+        self._chat_lock = threading.Lock()

        self.memory.sync()
        self.hooks.trigger("agent", "startup", {"workspace_dir": workspace_dir})
@@ -37,13 +41,88 @@ class Agent:
            self.heartbeat.on_alert = self._on_heartbeat_alert
            self.heartbeat.start()

+    def _get_context_messages(self, max_messages: int) -> List[dict]:
+        """Get recent messages without breaking tool_use/tool_result pairs.
+
+        Ensures that:
+        1. A tool_result message always has its preceding tool_use message
+        2. A tool_use message always has its following tool_result message
+        3. The first message is never a tool_result without its tool_use
+        """
+        if len(self.conversation_history) <= max_messages:
+            return list(self.conversation_history)
+
+        # Start with the most recent messages
+        start_idx = len(self.conversation_history) - max_messages
+        # Track original start_idx before adjustments for end-of-list check
+        original_start_idx = start_idx
+
+        # Check if we split a tool pair at the start
+        if start_idx > 0:
+            candidate = self.conversation_history[start_idx]
+            # If first message is a tool_result, include the tool_use before it
+            if candidate["role"] == "user" and isinstance(candidate.get("content"), list):
+                if any(isinstance(block, dict) and block.get("type") == "tool_result"
+                       for block in candidate["content"]):
+                    start_idx -= 1
+
+        # Build result slice using adjusted start
+        result = list(self.conversation_history[start_idx:])
+
+        # Check if we split a tool pair at the end
+        # Use original_start_idx + max_messages to find end of original slice
+        original_end_idx = original_start_idx + max_messages
+        if original_end_idx < len(self.conversation_history):
+            end_msg = self.conversation_history[original_end_idx - 1]
+            if end_msg["role"] == "assistant" and isinstance(end_msg.get("content"), list):
+                has_tool_use = any(
+                    (hasattr(block, "type") and block.type == "tool_use") or
+                    (isinstance(block, dict) and block.get("type") == "tool_use")
+                    for block in end_msg["content"]
+                )
+                if has_tool_use:
+                    # The tool_result at original_end_idx is already in result
+                    # if start_idx was adjusted, so only add if it's not there
+                    next_msg = self.conversation_history[original_end_idx]
+                    if next_msg not in result:
+                        result.append(next_msg)
+
+        return result
+
    def _on_heartbeat_alert(self, message: str) -> None:
        """Handle heartbeat alerts."""
        print(f"\nHeartbeat Alert:\n{message}\n")

+    def _prune_conversation_history(self) -> None:
+        """Prune conversation history to prevent unbounded growth.
+
+        Removes oldest messages while preserving tool_use/tool_result pairs.
+        """
+        if len(self.conversation_history) <= MAX_CONVERSATION_HISTORY:
+            return
+
+        # Keep the most recent half
+        keep_count = MAX_CONVERSATION_HISTORY // 2
+        start_idx = len(self.conversation_history) - keep_count
+
+        # Ensure we don't split a tool pair
+        if start_idx > 0:
+            candidate = self.conversation_history[start_idx]
+            if candidate["role"] == "user" and isinstance(candidate.get("content"), list):
+                if any(isinstance(block, dict) and block.get("type") == "tool_result"
+                       for block in candidate["content"]):
+                    start_idx -= 1
+
+        self.conversation_history = self.conversation_history[start_idx:]
+
    def chat(self, user_message: str, username: str = "default") -> str:
-        """Chat with context from memory and tool use."""
-        # Handle model switching commands
+        """Chat with context from memory and tool use.
+
+        Thread-safe: uses a lock to prevent concurrent modification of
+        conversation history from multiple threads (e.g., scheduled tasks
+        and live messages).
+        """
+        # Handle model switching commands (no lock needed, read-only on history)
        if user_message.lower().startswith("/model "):
            model_name = user_message[7:].strip()
            self.llm.set_model(model_name)
@@ -66,9 +145,14 @@ class Agent:
                f"Commands: /sonnet, /haiku, /status"
            )

+        with self._chat_lock:
+            return self._chat_inner(user_message, username)
+
+    def _chat_inner(self, user_message: str, username: str) -> str:
+        """Inner chat logic, called while holding _chat_lock."""
        soul = self.memory.get_soul()
        user_profile = self.memory.get_user(username)
-        relevant_memory = self.memory.search(user_message, max_results=2)
+        relevant_memory = self.memory.search_hybrid(user_message, max_results=2)

        memory_lines = [f"- {mem['snippet']}" for mem in relevant_memory]
        system = (
@@ -82,18 +166,29 @@ class Agent:
            {"role": "user", "content": user_message}
        )

+        # Prune history to prevent unbounded growth
+        self._prune_conversation_history()
+
        # Tool execution loop
        max_iterations = 5  # Reduced from 10 to save costs
        # Enable caching for Sonnet to save 90% on repeated system prompts
        use_caching = "sonnet" in self.llm.model.lower()

        for iteration in range(max_iterations):
-            response = self.llm.chat_with_tools(
-                self.conversation_history[-MAX_CONTEXT_MESSAGES:],
-                tools=TOOL_DEFINITIONS,
-                system=system,
-                use_cache=use_caching,
-            )
+            # Get recent messages, ensuring we don't break tool_use/tool_result pairs
+            context_messages = self._get_context_messages(MAX_CONTEXT_MESSAGES)
+
+            try:
+                response = self.llm.chat_with_tools(
+                    context_messages,
+                    tools=TOOL_DEFINITIONS,
+                    system=system,
+                    use_cache=use_caching,
+                )
+            except Exception as e:
+                error_msg = f"LLM API error: {e}"
+                print(f"[Agent] {error_msg}")
+                return f"Sorry, I encountered an error communicating with the AI model. Please try again."

            # Check stop reason
            if response.stop_reason == "end_turn":
@@ -104,6 +199,11 @@ class Agent:
                        text_content.append(block.text)

                final_response = "\n".join(text_content)
+
+                # Handle empty response
+                if not final_response.strip():
+                    final_response = "(No response generated)"
+
                self.conversation_history.append(
                    {"role": "assistant", "content": final_response}
                )
@@ -146,6 +246,9 @@ class Agent:
                tool_results = []
                for tool_use in tool_uses:
                    result = execute_tool(tool_use.name, tool_use.input)
+                    # Truncate large tool outputs to prevent token explosion
+                    if len(result) > 5000:
+                        result = result[:5000] + "\n... (output truncated)"
                    print(f"[Tool] {tool_use.name}: {result[:100]}...")
                    tool_results.append({
                        "type": "tool_result",