Add API usage tracking and dynamic task reloading
Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
121
agent.py
121
agent.py
@@ -1,5 +1,6 @@
|
||||
"""AI Agent with Memory and LLM Integration."""
|
||||
|
||||
import threading
|
||||
from typing import List, Optional
|
||||
|
||||
from heartbeat import Heartbeat
|
||||
@@ -12,6 +13,8 @@ from tools import TOOL_DEFINITIONS, execute_tool
|
||||
MAX_CONTEXT_MESSAGES = 3 # Reduced from 5 to save tokens
|
||||
# Maximum characters of agent response to store in memory
|
||||
MEMORY_RESPONSE_PREVIEW_LENGTH = 200
|
||||
# Maximum conversation history entries before pruning
|
||||
MAX_CONVERSATION_HISTORY = 50
|
||||
|
||||
|
||||
class Agent:
|
||||
@@ -27,6 +30,7 @@ class Agent:
|
||||
self.llm = LLMInterface(provider)
|
||||
self.hooks = HooksSystem()
|
||||
self.conversation_history: List[dict] = []
|
||||
self._chat_lock = threading.Lock()
|
||||
|
||||
self.memory.sync()
|
||||
self.hooks.trigger("agent", "startup", {"workspace_dir": workspace_dir})
|
||||
@@ -37,13 +41,88 @@ class Agent:
|
||||
self.heartbeat.on_alert = self._on_heartbeat_alert
|
||||
self.heartbeat.start()
|
||||
|
||||
def _get_context_messages(self, max_messages: int) -> List[dict]:
|
||||
"""Get recent messages without breaking tool_use/tool_result pairs.
|
||||
|
||||
Ensures that:
|
||||
1. A tool_result message always has its preceding tool_use message
|
||||
2. A tool_use message always has its following tool_result message
|
||||
3. The first message is never a tool_result without its tool_use
|
||||
"""
|
||||
if len(self.conversation_history) <= max_messages:
|
||||
return list(self.conversation_history)
|
||||
|
||||
# Start with the most recent messages
|
||||
start_idx = len(self.conversation_history) - max_messages
|
||||
# Track original start_idx before adjustments for end-of-list check
|
||||
original_start_idx = start_idx
|
||||
|
||||
# Check if we split a tool pair at the start
|
||||
if start_idx > 0:
|
||||
candidate = self.conversation_history[start_idx]
|
||||
# If first message is a tool_result, include the tool_use before it
|
||||
if candidate["role"] == "user" and isinstance(candidate.get("content"), list):
|
||||
if any(isinstance(block, dict) and block.get("type") == "tool_result"
|
||||
for block in candidate["content"]):
|
||||
start_idx -= 1
|
||||
|
||||
# Build result slice using adjusted start
|
||||
result = list(self.conversation_history[start_idx:])
|
||||
|
||||
# Check if we split a tool pair at the end
|
||||
# Use original_start_idx + max_messages to find end of original slice
|
||||
original_end_idx = original_start_idx + max_messages
|
||||
if original_end_idx < len(self.conversation_history):
|
||||
end_msg = self.conversation_history[original_end_idx - 1]
|
||||
if end_msg["role"] == "assistant" and isinstance(end_msg.get("content"), list):
|
||||
has_tool_use = any(
|
||||
(hasattr(block, "type") and block.type == "tool_use") or
|
||||
(isinstance(block, dict) and block.get("type") == "tool_use")
|
||||
for block in end_msg["content"]
|
||||
)
|
||||
if has_tool_use:
|
||||
# The tool_result at original_end_idx is already in result
|
||||
# if start_idx was adjusted, so only add if it's not there
|
||||
next_msg = self.conversation_history[original_end_idx]
|
||||
if next_msg not in result:
|
||||
result.append(next_msg)
|
||||
|
||||
return result
|
||||
|
||||
def _on_heartbeat_alert(self, message: str) -> None:
|
||||
"""Handle heartbeat alerts."""
|
||||
print(f"\nHeartbeat Alert:\n{message}\n")
|
||||
|
||||
def _prune_conversation_history(self) -> None:
|
||||
"""Prune conversation history to prevent unbounded growth.
|
||||
|
||||
Removes oldest messages while preserving tool_use/tool_result pairs.
|
||||
"""
|
||||
if len(self.conversation_history) <= MAX_CONVERSATION_HISTORY:
|
||||
return
|
||||
|
||||
# Keep the most recent half
|
||||
keep_count = MAX_CONVERSATION_HISTORY // 2
|
||||
start_idx = len(self.conversation_history) - keep_count
|
||||
|
||||
# Ensure we don't split a tool pair
|
||||
if start_idx > 0:
|
||||
candidate = self.conversation_history[start_idx]
|
||||
if candidate["role"] == "user" and isinstance(candidate.get("content"), list):
|
||||
if any(isinstance(block, dict) and block.get("type") == "tool_result"
|
||||
for block in candidate["content"]):
|
||||
start_idx -= 1
|
||||
|
||||
self.conversation_history = self.conversation_history[start_idx:]
|
||||
|
||||
def chat(self, user_message: str, username: str = "default") -> str:
|
||||
"""Chat with context from memory and tool use."""
|
||||
# Handle model switching commands
|
||||
"""Chat with context from memory and tool use.
|
||||
|
||||
Thread-safe: uses a lock to prevent concurrent modification of
|
||||
conversation history from multiple threads (e.g., scheduled tasks
|
||||
and live messages).
|
||||
"""
|
||||
# Handle model switching commands (no lock needed, read-only on history)
|
||||
if user_message.lower().startswith("/model "):
|
||||
model_name = user_message[7:].strip()
|
||||
self.llm.set_model(model_name)
|
||||
@@ -66,9 +145,14 @@ class Agent:
|
||||
f"Commands: /sonnet, /haiku, /status"
|
||||
)
|
||||
|
||||
with self._chat_lock:
|
||||
return self._chat_inner(user_message, username)
|
||||
|
||||
def _chat_inner(self, user_message: str, username: str) -> str:
|
||||
"""Inner chat logic, called while holding _chat_lock."""
|
||||
soul = self.memory.get_soul()
|
||||
user_profile = self.memory.get_user(username)
|
||||
relevant_memory = self.memory.search(user_message, max_results=2)
|
||||
relevant_memory = self.memory.search_hybrid(user_message, max_results=2)
|
||||
|
||||
memory_lines = [f"- {mem['snippet']}" for mem in relevant_memory]
|
||||
system = (
|
||||
@@ -82,18 +166,29 @@ class Agent:
|
||||
{"role": "user", "content": user_message}
|
||||
)
|
||||
|
||||
# Prune history to prevent unbounded growth
|
||||
self._prune_conversation_history()
|
||||
|
||||
# Tool execution loop
|
||||
max_iterations = 5 # Reduced from 10 to save costs
|
||||
# Enable caching for Sonnet to save 90% on repeated system prompts
|
||||
use_caching = "sonnet" in self.llm.model.lower()
|
||||
|
||||
for iteration in range(max_iterations):
|
||||
response = self.llm.chat_with_tools(
|
||||
self.conversation_history[-MAX_CONTEXT_MESSAGES:],
|
||||
tools=TOOL_DEFINITIONS,
|
||||
system=system,
|
||||
use_cache=use_caching,
|
||||
)
|
||||
# Get recent messages, ensuring we don't break tool_use/tool_result pairs
|
||||
context_messages = self._get_context_messages(MAX_CONTEXT_MESSAGES)
|
||||
|
||||
try:
|
||||
response = self.llm.chat_with_tools(
|
||||
context_messages,
|
||||
tools=TOOL_DEFINITIONS,
|
||||
system=system,
|
||||
use_cache=use_caching,
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"LLM API error: {e}"
|
||||
print(f"[Agent] {error_msg}")
|
||||
return f"Sorry, I encountered an error communicating with the AI model. Please try again."
|
||||
|
||||
# Check stop reason
|
||||
if response.stop_reason == "end_turn":
|
||||
@@ -104,6 +199,11 @@ class Agent:
|
||||
text_content.append(block.text)
|
||||
|
||||
final_response = "\n".join(text_content)
|
||||
|
||||
# Handle empty response
|
||||
if not final_response.strip():
|
||||
final_response = "(No response generated)"
|
||||
|
||||
self.conversation_history.append(
|
||||
{"role": "assistant", "content": final_response}
|
||||
)
|
||||
@@ -146,6 +246,9 @@ class Agent:
|
||||
tool_results = []
|
||||
for tool_use in tool_uses:
|
||||
result = execute_tool(tool_use.name, tool_use.input)
|
||||
# Truncate large tool outputs to prevent token explosion
|
||||
if len(result) > 5000:
|
||||
result = result[:5000] + "\n... (output truncated)"
|
||||
print(f"[Tool] {tool_use.name}: {result[:100]}...")
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
|
||||
Reference in New Issue
Block a user