"""AI Agent with Memory and LLM Integration.""" import threading from typing import List, Optional from heartbeat import Heartbeat from hooks import HooksSystem from llm_interface import LLMInterface from memory_system import MemorySystem from self_healing import SelfHealingSystem from tools import TOOL_DEFINITIONS, execute_tool # Maximum number of recent messages to include in LLM context MAX_CONTEXT_MESSAGES = 3 # Reduced from 5 to save tokens # Maximum characters of agent response to store in memory MEMORY_RESPONSE_PREVIEW_LENGTH = 200 # Maximum conversation history entries before pruning MAX_CONVERSATION_HISTORY = 50 class Agent: """AI Agent with memory, LLM, heartbeat, and hooks.""" def __init__( self, provider: str = "claude", workspace_dir: str = "./memory_workspace", enable_heartbeat: bool = False, ) -> None: self.memory = MemorySystem(workspace_dir) self.llm = LLMInterface(provider) self.hooks = HooksSystem() self.conversation_history: List[dict] = [] self._chat_lock = threading.Lock() self.healing_system = SelfHealingSystem(self.memory, self) self.memory.sync() self.hooks.trigger("agent", "startup", {"workspace_dir": workspace_dir}) self.heartbeat: Optional[Heartbeat] = None if enable_heartbeat: self.heartbeat = Heartbeat(self.memory, self.llm) self.heartbeat.on_alert = self._on_heartbeat_alert self.heartbeat.start() def _get_context_messages(self, max_messages: int) -> List[dict]: """Get recent messages without breaking tool_use/tool_result pairs. Ensures that: 1. A tool_result message always has its preceding tool_use message 2. A tool_use message always has its following tool_result message 3. The first message is never a tool_result without its tool_use """ if len(self.conversation_history) <= max_messages: return list(self.conversation_history) # Start with the most recent messages start_idx = len(self.conversation_history) - max_messages # Track original start_idx before adjustments for end-of-list check original_start_idx = start_idx # Check if we split a tool pair at the start if start_idx > 0: candidate = self.conversation_history[start_idx] # If first message is a tool_result, include the tool_use before it if candidate["role"] == "user" and isinstance(candidate.get("content"), list): if any(isinstance(block, dict) and block.get("type") == "tool_result" for block in candidate["content"]): start_idx -= 1 # Build result slice using adjusted start result = list(self.conversation_history[start_idx:]) # Check if we split a tool pair at the end # Use original_start_idx + max_messages to find end of original slice original_end_idx = original_start_idx + max_messages if original_end_idx < len(self.conversation_history): end_msg = self.conversation_history[original_end_idx - 1] if end_msg["role"] == "assistant" and isinstance(end_msg.get("content"), list): has_tool_use = any( (hasattr(block, "type") and block.type == "tool_use") or (isinstance(block, dict) and block.get("type") == "tool_use") for block in end_msg["content"] ) if has_tool_use: # The tool_result at original_end_idx is already in result # if start_idx was adjusted, so only add if it's not there next_msg = self.conversation_history[original_end_idx] if next_msg not in result: result.append(next_msg) return result def _on_heartbeat_alert(self, message: str) -> None: """Handle heartbeat alerts.""" print(f"\nHeartbeat Alert:\n{message}\n") def _prune_conversation_history(self) -> None: """Prune conversation history to prevent unbounded growth. Removes oldest messages while preserving tool_use/tool_result pairs. """ if len(self.conversation_history) <= MAX_CONVERSATION_HISTORY: return # Keep the most recent half keep_count = MAX_CONVERSATION_HISTORY // 2 start_idx = len(self.conversation_history) - keep_count # Ensure we don't split a tool pair if start_idx > 0: candidate = self.conversation_history[start_idx] if candidate["role"] == "user" and isinstance(candidate.get("content"), list): if any(isinstance(block, dict) and block.get("type") == "tool_result" for block in candidate["content"]): start_idx -= 1 self.conversation_history = self.conversation_history[start_idx:] def chat(self, user_message: str, username: str = "default") -> str: """Chat with context from memory and tool use. Thread-safe: uses a lock to prevent concurrent modification of conversation history from multiple threads (e.g., scheduled tasks and live messages). """ # Handle model switching commands (no lock needed, read-only on history) if user_message.lower().startswith("/model "): model_name = user_message[7:].strip() self.llm.set_model(model_name) return f"Switched to model: {model_name}" elif user_message.lower() == "/sonnet": self.llm.set_model("claude-sonnet-4-5-20250929") return "Switched to Claude Sonnet 4.5 (more capable, higher cost)" elif user_message.lower() == "/haiku": self.llm.set_model("claude-haiku-4-5-20251001") return "Switched to Claude Haiku 4.5 (faster, cheaper)" elif user_message.lower() == "/status": current_model = self.llm.model is_sonnet = "sonnet" in current_model.lower() cache_status = "enabled" if is_sonnet else "disabled (Haiku active)" return ( f"Current model: {current_model}\n" f"Prompt caching: {cache_status}\n" f"Context messages: {MAX_CONTEXT_MESSAGES}\n" f"Memory results: 2\n\n" f"Commands: /sonnet, /haiku, /status" ) with self._chat_lock: return self._chat_inner(user_message, username) def _chat_inner(self, user_message: str, username: str) -> str: """Inner chat logic, called while holding _chat_lock.""" soul = self.memory.get_soul() user_profile = self.memory.get_user(username) relevant_memory = self.memory.search_hybrid(user_message, max_results=2) memory_lines = [f"- {mem['snippet']}" for mem in relevant_memory] system = ( f"{soul}\n\nUser Profile:\n{user_profile}\n\n" f"Relevant Memory:\n" + "\n".join(memory_lines) + f"\n\nYou have access to tools for file operations and command execution. " f"Use them freely to help the user." ) self.conversation_history.append( {"role": "user", "content": user_message} ) # Prune history to prevent unbounded growth self._prune_conversation_history() # Tool execution loop max_iterations = 5 # Reduced from 10 to save costs # Enable caching for Sonnet to save 90% on repeated system prompts use_caching = "sonnet" in self.llm.model.lower() for iteration in range(max_iterations): # Get recent messages, ensuring we don't break tool_use/tool_result pairs context_messages = self._get_context_messages(MAX_CONTEXT_MESSAGES) try: response = self.llm.chat_with_tools( context_messages, tools=TOOL_DEFINITIONS, system=system, use_cache=use_caching, ) except Exception as e: error_msg = f"LLM API error: {e}" print(f"[Agent] {error_msg}") self.healing_system.capture_error( error=e, component="agent.py:_chat_inner", intent="Calling LLM API for chat response", context={ "model": self.llm.model, "message_preview": user_message[:100], "iteration": iteration, }, ) return f"Sorry, I encountered an error communicating with the AI model. Please try again." # Check stop reason if response.stop_reason == "end_turn": # Extract text response text_content = [] for block in response.content: if block.type == "text": text_content.append(block.text) final_response = "\n".join(text_content) # Handle empty response if not final_response.strip(): final_response = "(No response generated)" self.conversation_history.append( {"role": "assistant", "content": final_response} ) preview = final_response[:MEMORY_RESPONSE_PREVIEW_LENGTH] self.memory.write_memory( f"**User ({username})**: {user_message}\n" f"**Agent**: {preview}...", daily=True, ) return final_response elif response.stop_reason == "tool_use": # Build assistant message with tool uses assistant_content = [] tool_uses = [] for block in response.content: if block.type == "text": assistant_content.append({ "type": "text", "text": block.text }) elif block.type == "tool_use": assistant_content.append({ "type": "tool_use", "id": block.id, "name": block.name, "input": block.input }) tool_uses.append(block) self.conversation_history.append({ "role": "assistant", "content": assistant_content }) # Execute tools and build tool result message tool_results = [] for tool_use in tool_uses: result = execute_tool(tool_use.name, tool_use.input, healing_system=self.healing_system) # Truncate large tool outputs to prevent token explosion if len(result) > 5000: result = result[:5000] + "\n... (output truncated)" print(f"[Tool] {tool_use.name}: {result[:100]}...") tool_results.append({ "type": "tool_result", "tool_use_id": tool_use.id, "content": result }) self.conversation_history.append({ "role": "user", "content": tool_results }) else: # Unexpected stop reason return f"Unexpected stop reason: {response.stop_reason}" return "Error: Maximum tool use iterations exceeded" def switch_model(self, provider: str) -> None: """Switch LLM provider.""" self.llm = LLMInterface(provider) if self.heartbeat: self.heartbeat.llm = self.llm def shutdown(self) -> None: """Cleanup and stop background services.""" if self.heartbeat: self.heartbeat.stop() self.memory.close() self.hooks.trigger("agent", "shutdown", {}) if __name__ == "__main__": agent = Agent(provider="claude") response = agent.chat("What's my current project?", username="alice") print(response)