2026-02-13 19:06:28 -07:00
|
|
|
"""AI Agent with Memory and LLM Integration."""
|
|
|
|
|
|
2026-02-13 23:38:44 -07:00
|
|
|
import threading
|
2026-02-13 19:06:28 -07:00
|
|
|
from typing import List, Optional
|
|
|
|
|
|
|
|
|
|
from hooks import HooksSystem
|
|
|
|
|
from llm_interface import LLMInterface
|
|
|
|
|
from memory_system import MemorySystem
|
2026-02-14 18:03:42 -07:00
|
|
|
from self_healing import SelfHealingSystem
|
2026-02-13 19:06:28 -07:00
|
|
|
from tools import TOOL_DEFINITIONS, execute_tool
|
|
|
|
|
|
|
|
|
|
# Maximum number of recent messages to include in LLM context
|
2026-02-15 10:22:23 -07:00
|
|
|
MAX_CONTEXT_MESSAGES = 20 # Optimized for Agent SDK flat-rate subscription
|
2026-02-13 19:06:28 -07:00
|
|
|
# Maximum characters of agent response to store in memory
|
2026-02-15 10:22:23 -07:00
|
|
|
MEMORY_RESPONSE_PREVIEW_LENGTH = 500 # Store more context for better memory retrieval
|
2026-02-13 23:38:44 -07:00
|
|
|
# Maximum conversation history entries before pruning
|
2026-02-15 10:22:23 -07:00
|
|
|
MAX_CONVERSATION_HISTORY = 100 # Higher limit with flat-rate subscription
|
2026-02-16 07:43:31 -07:00
|
|
|
# Maximum tool execution iterations (generous limit for complex operations like zettelkasten)
|
|
|
|
|
MAX_TOOL_ITERATIONS = 30 # Allows complex multi-step workflows with auto-linking, hybrid search, etc.
|
2026-02-13 19:06:28 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class Agent:
|
2026-02-15 09:57:39 -07:00
|
|
|
"""AI Agent with memory, LLM, and hooks."""
|
2026-02-13 19:06:28 -07:00
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
provider: str = "claude",
|
|
|
|
|
workspace_dir: str = "./memory_workspace",
|
2026-02-16 07:43:31 -07:00
|
|
|
is_sub_agent: bool = False,
|
|
|
|
|
specialist_prompt: Optional[str] = None,
|
2026-02-13 19:06:28 -07:00
|
|
|
) -> None:
|
|
|
|
|
self.memory = MemorySystem(workspace_dir)
|
|
|
|
|
self.llm = LLMInterface(provider)
|
|
|
|
|
self.hooks = HooksSystem()
|
|
|
|
|
self.conversation_history: List[dict] = []
|
2026-02-13 23:38:44 -07:00
|
|
|
self._chat_lock = threading.Lock()
|
2026-02-14 18:03:42 -07:00
|
|
|
self.healing_system = SelfHealingSystem(self.memory, self)
|
2026-02-13 19:06:28 -07:00
|
|
|
|
2026-02-16 07:43:31 -07:00
|
|
|
# Sub-agent orchestration
|
|
|
|
|
self.is_sub_agent = is_sub_agent
|
|
|
|
|
self.specialist_prompt = specialist_prompt
|
|
|
|
|
self.sub_agents: dict = {} # Cache for spawned sub-agents
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
self.memory.sync()
|
2026-02-16 07:43:31 -07:00
|
|
|
if not is_sub_agent: # Only trigger hooks for main agent
|
|
|
|
|
self.hooks.trigger("agent", "startup", {"workspace_dir": workspace_dir})
|
|
|
|
|
|
|
|
|
|
def spawn_sub_agent(
|
|
|
|
|
self,
|
|
|
|
|
specialist_prompt: str,
|
|
|
|
|
agent_id: Optional[str] = None,
|
|
|
|
|
share_memory: bool = True,
|
|
|
|
|
) -> 'Agent':
|
|
|
|
|
"""Spawn a sub-agent with specialized system prompt.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
specialist_prompt: Custom system prompt for the specialist
|
|
|
|
|
agent_id: Optional ID for caching (reuse same specialist)
|
|
|
|
|
share_memory: Whether to share memory workspace with main agent
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Agent instance configured as a specialist
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
sub = agent.spawn_sub_agent(
|
|
|
|
|
specialist_prompt="You are a zettelkasten expert. Focus ONLY on note-taking.",
|
|
|
|
|
agent_id="zettelkasten_processor"
|
|
|
|
|
)
|
|
|
|
|
result = sub.chat("Process my fleeting notes", username="jordan")
|
|
|
|
|
"""
|
|
|
|
|
# Check cache if agent_id provided
|
|
|
|
|
if agent_id and agent_id in self.sub_agents:
|
|
|
|
|
return self.sub_agents[agent_id]
|
|
|
|
|
|
|
|
|
|
# Create new sub-agent
|
|
|
|
|
workspace = self.memory.workspace_dir if share_memory else f"{self.memory.workspace_dir}/sub_agents/{agent_id}"
|
|
|
|
|
sub_agent = Agent(
|
|
|
|
|
provider=self.llm.provider,
|
|
|
|
|
workspace_dir=workspace,
|
|
|
|
|
is_sub_agent=True,
|
|
|
|
|
specialist_prompt=specialist_prompt,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Cache if ID provided
|
|
|
|
|
if agent_id:
|
|
|
|
|
self.sub_agents[agent_id] = sub_agent
|
|
|
|
|
|
|
|
|
|
return sub_agent
|
|
|
|
|
|
|
|
|
|
def delegate(
|
|
|
|
|
self,
|
|
|
|
|
task: str,
|
|
|
|
|
specialist_prompt: str,
|
|
|
|
|
username: str = "default",
|
|
|
|
|
agent_id: Optional[str] = None,
|
|
|
|
|
) -> str:
|
|
|
|
|
"""Delegate a task to a specialist sub-agent (convenience method).
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
task: The task/message to send to the specialist
|
|
|
|
|
specialist_prompt: System prompt defining the specialist's role
|
|
|
|
|
username: Username for context
|
|
|
|
|
agent_id: Optional ID for caching the specialist
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Response from the specialist
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
# One-off delegation
|
|
|
|
|
result = agent.delegate(
|
|
|
|
|
task="Process my fleeting notes and find connections",
|
|
|
|
|
specialist_prompt="You are a zettelkasten expert. Focus on note organization and linking.",
|
|
|
|
|
username="jordan"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Cached specialist (reused across multiple calls)
|
|
|
|
|
result = agent.delegate(
|
|
|
|
|
task="Summarize my emails from today",
|
|
|
|
|
specialist_prompt="You are an email analyst. Focus on extracting key information.",
|
|
|
|
|
username="jordan",
|
|
|
|
|
agent_id="email_analyst"
|
|
|
|
|
)
|
|
|
|
|
"""
|
|
|
|
|
sub_agent = self.spawn_sub_agent(specialist_prompt, agent_id=agent_id)
|
|
|
|
|
return sub_agent.chat(task, username=username)
|
2026-02-13 19:06:28 -07:00
|
|
|
|
2026-02-13 23:38:44 -07:00
|
|
|
def _get_context_messages(self, max_messages: int) -> List[dict]:
|
|
|
|
|
"""Get recent messages without breaking tool_use/tool_result pairs.
|
|
|
|
|
|
|
|
|
|
Ensures that:
|
|
|
|
|
1. A tool_result message always has its preceding tool_use message
|
|
|
|
|
2. A tool_use message always has its following tool_result message
|
|
|
|
|
3. The first message is never a tool_result without its tool_use
|
|
|
|
|
"""
|
|
|
|
|
if len(self.conversation_history) <= max_messages:
|
|
|
|
|
return list(self.conversation_history)
|
|
|
|
|
|
|
|
|
|
# Start with the most recent messages
|
|
|
|
|
start_idx = len(self.conversation_history) - max_messages
|
|
|
|
|
# Track original start_idx before adjustments for end-of-list check
|
|
|
|
|
original_start_idx = start_idx
|
|
|
|
|
|
|
|
|
|
# Check if we split a tool pair at the start
|
|
|
|
|
if start_idx > 0:
|
|
|
|
|
candidate = self.conversation_history[start_idx]
|
|
|
|
|
# If first message is a tool_result, include the tool_use before it
|
|
|
|
|
if candidate["role"] == "user" and isinstance(candidate.get("content"), list):
|
|
|
|
|
if any(isinstance(block, dict) and block.get("type") == "tool_result"
|
|
|
|
|
for block in candidate["content"]):
|
|
|
|
|
start_idx -= 1
|
|
|
|
|
|
|
|
|
|
# Build result slice using adjusted start
|
|
|
|
|
result = list(self.conversation_history[start_idx:])
|
|
|
|
|
|
|
|
|
|
# Check if we split a tool pair at the end
|
|
|
|
|
# Use original_start_idx + max_messages to find end of original slice
|
|
|
|
|
original_end_idx = original_start_idx + max_messages
|
|
|
|
|
if original_end_idx < len(self.conversation_history):
|
|
|
|
|
end_msg = self.conversation_history[original_end_idx - 1]
|
|
|
|
|
if end_msg["role"] == "assistant" and isinstance(end_msg.get("content"), list):
|
|
|
|
|
has_tool_use = any(
|
|
|
|
|
(hasattr(block, "type") and block.type == "tool_use") or
|
|
|
|
|
(isinstance(block, dict) and block.get("type") == "tool_use")
|
|
|
|
|
for block in end_msg["content"]
|
|
|
|
|
)
|
|
|
|
|
if has_tool_use:
|
|
|
|
|
# The tool_result at original_end_idx is already in result
|
|
|
|
|
# if start_idx was adjusted, so only add if it's not there
|
|
|
|
|
next_msg = self.conversation_history[original_end_idx]
|
|
|
|
|
if next_msg not in result:
|
|
|
|
|
result.append(next_msg)
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
def _prune_conversation_history(self) -> None:
|
|
|
|
|
"""Prune conversation history to prevent unbounded growth.
|
|
|
|
|
|
|
|
|
|
Removes oldest messages while preserving tool_use/tool_result pairs.
|
|
|
|
|
"""
|
|
|
|
|
if len(self.conversation_history) <= MAX_CONVERSATION_HISTORY:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Keep the most recent half
|
|
|
|
|
keep_count = MAX_CONVERSATION_HISTORY // 2
|
|
|
|
|
start_idx = len(self.conversation_history) - keep_count
|
|
|
|
|
|
|
|
|
|
# Ensure we don't split a tool pair
|
|
|
|
|
if start_idx > 0:
|
|
|
|
|
candidate = self.conversation_history[start_idx]
|
|
|
|
|
if candidate["role"] == "user" and isinstance(candidate.get("content"), list):
|
|
|
|
|
if any(isinstance(block, dict) and block.get("type") == "tool_result"
|
|
|
|
|
for block in candidate["content"]):
|
|
|
|
|
start_idx -= 1
|
|
|
|
|
|
|
|
|
|
self.conversation_history = self.conversation_history[start_idx:]
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
def chat(self, user_message: str, username: str = "default") -> str:
|
2026-02-13 23:38:44 -07:00
|
|
|
"""Chat with context from memory and tool use.
|
|
|
|
|
|
|
|
|
|
Thread-safe: uses a lock to prevent concurrent modification of
|
|
|
|
|
conversation history from multiple threads (e.g., scheduled tasks
|
|
|
|
|
and live messages).
|
|
|
|
|
"""
|
|
|
|
|
# Handle model switching commands (no lock needed, read-only on history)
|
2026-02-13 19:06:28 -07:00
|
|
|
if user_message.lower().startswith("/model "):
|
|
|
|
|
model_name = user_message[7:].strip()
|
|
|
|
|
self.llm.set_model(model_name)
|
|
|
|
|
return f"Switched to model: {model_name}"
|
|
|
|
|
elif user_message.lower() == "/sonnet":
|
|
|
|
|
self.llm.set_model("claude-sonnet-4-5-20250929")
|
|
|
|
|
return "Switched to Claude Sonnet 4.5 (more capable, higher cost)"
|
|
|
|
|
elif user_message.lower() == "/haiku":
|
|
|
|
|
self.llm.set_model("claude-haiku-4-5-20251001")
|
|
|
|
|
return "Switched to Claude Haiku 4.5 (faster, cheaper)"
|
|
|
|
|
elif user_message.lower() == "/status":
|
|
|
|
|
current_model = self.llm.model
|
|
|
|
|
is_sonnet = "sonnet" in current_model.lower()
|
|
|
|
|
cache_status = "enabled" if is_sonnet else "disabled (Haiku active)"
|
|
|
|
|
return (
|
|
|
|
|
f"Current model: {current_model}\n"
|
|
|
|
|
f"Prompt caching: {cache_status}\n"
|
|
|
|
|
f"Context messages: {MAX_CONTEXT_MESSAGES}\n"
|
|
|
|
|
f"Memory results: 2\n\n"
|
|
|
|
|
f"Commands: /sonnet, /haiku, /status"
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-13 23:38:44 -07:00
|
|
|
with self._chat_lock:
|
|
|
|
|
return self._chat_inner(user_message, username)
|
|
|
|
|
|
|
|
|
|
def _chat_inner(self, user_message: str, username: str) -> str:
|
|
|
|
|
"""Inner chat logic, called while holding _chat_lock."""
|
2026-02-16 07:43:31 -07:00
|
|
|
# Use specialist prompt if this is a sub-agent, otherwise use full context
|
|
|
|
|
if self.specialist_prompt:
|
|
|
|
|
# Sub-agent: Use focused specialist prompt
|
|
|
|
|
system = (
|
|
|
|
|
f"{self.specialist_prompt}\n\n"
|
|
|
|
|
f"You have access to {len(TOOL_DEFINITIONS)} tools. Use them to accomplish your specialized task. "
|
|
|
|
|
f"Stay focused on your specialty and complete the task efficiently."
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
# Main agent: Use full SOUL, user profile, and memory context
|
|
|
|
|
soul = self.memory.get_soul()
|
|
|
|
|
user_profile = self.memory.get_user(username)
|
|
|
|
|
relevant_memory = self.memory.search_hybrid(user_message, max_results=5)
|
|
|
|
|
|
|
|
|
|
memory_lines = [f"- {mem['snippet']}" for mem in relevant_memory]
|
|
|
|
|
system = (
|
|
|
|
|
f"{soul}\n\nUser Profile:\n{user_profile}\n\n"
|
|
|
|
|
f"Relevant Memory:\n" + "\n".join(memory_lines) +
|
|
|
|
|
f"\n\nYou have access to {len(TOOL_DEFINITIONS)} tools for file operations, "
|
|
|
|
|
f"command execution, and Google services. Use them freely to help the user. "
|
|
|
|
|
f"Note: You're running on a flat-rate Agent SDK subscription, so don't worry "
|
|
|
|
|
f"about API costs when making multiple tool calls or processing large contexts."
|
|
|
|
|
)
|
2026-02-13 19:06:28 -07:00
|
|
|
|
|
|
|
|
self.conversation_history.append(
|
|
|
|
|
{"role": "user", "content": user_message}
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-13 23:38:44 -07:00
|
|
|
# Prune history to prevent unbounded growth
|
|
|
|
|
self._prune_conversation_history()
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
# Tool execution loop
|
2026-02-16 07:43:31 -07:00
|
|
|
max_iterations = MAX_TOOL_ITERATIONS
|
2026-02-13 19:06:28 -07:00
|
|
|
# Enable caching for Sonnet to save 90% on repeated system prompts
|
|
|
|
|
use_caching = "sonnet" in self.llm.model.lower()
|
|
|
|
|
|
|
|
|
|
for iteration in range(max_iterations):
|
2026-02-13 23:38:44 -07:00
|
|
|
# Get recent messages, ensuring we don't break tool_use/tool_result pairs
|
|
|
|
|
context_messages = self._get_context_messages(MAX_CONTEXT_MESSAGES)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = self.llm.chat_with_tools(
|
|
|
|
|
context_messages,
|
|
|
|
|
tools=TOOL_DEFINITIONS,
|
|
|
|
|
system=system,
|
|
|
|
|
use_cache=use_caching,
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
error_msg = f"LLM API error: {e}"
|
|
|
|
|
print(f"[Agent] {error_msg}")
|
2026-02-14 18:03:42 -07:00
|
|
|
self.healing_system.capture_error(
|
|
|
|
|
error=e,
|
|
|
|
|
component="agent.py:_chat_inner",
|
|
|
|
|
intent="Calling LLM API for chat response",
|
|
|
|
|
context={
|
|
|
|
|
"model": self.llm.model,
|
|
|
|
|
"message_preview": user_message[:100],
|
|
|
|
|
"iteration": iteration,
|
|
|
|
|
},
|
|
|
|
|
)
|
2026-02-13 23:38:44 -07:00
|
|
|
return f"Sorry, I encountered an error communicating with the AI model. Please try again."
|
2026-02-13 19:06:28 -07:00
|
|
|
|
|
|
|
|
# Check stop reason
|
|
|
|
|
if response.stop_reason == "end_turn":
|
|
|
|
|
# Extract text response
|
|
|
|
|
text_content = []
|
|
|
|
|
for block in response.content:
|
|
|
|
|
if block.type == "text":
|
|
|
|
|
text_content.append(block.text)
|
|
|
|
|
|
|
|
|
|
final_response = "\n".join(text_content)
|
2026-02-13 23:38:44 -07:00
|
|
|
|
|
|
|
|
# Handle empty response
|
|
|
|
|
if not final_response.strip():
|
|
|
|
|
final_response = "(No response generated)"
|
|
|
|
|
|
2026-02-13 19:06:28 -07:00
|
|
|
self.conversation_history.append(
|
|
|
|
|
{"role": "assistant", "content": final_response}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
preview = final_response[:MEMORY_RESPONSE_PREVIEW_LENGTH]
|
|
|
|
|
self.memory.write_memory(
|
2026-02-15 10:22:23 -07:00
|
|
|
f"**{username}**: {user_message}\n"
|
|
|
|
|
f"**Garvis**: {preview}...",
|
2026-02-13 19:06:28 -07:00
|
|
|
daily=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return final_response
|
|
|
|
|
|
|
|
|
|
elif response.stop_reason == "tool_use":
|
|
|
|
|
# Build assistant message with tool uses
|
|
|
|
|
assistant_content = []
|
|
|
|
|
tool_uses = []
|
|
|
|
|
|
|
|
|
|
for block in response.content:
|
|
|
|
|
if block.type == "text":
|
|
|
|
|
assistant_content.append({
|
|
|
|
|
"type": "text",
|
|
|
|
|
"text": block.text
|
|
|
|
|
})
|
|
|
|
|
elif block.type == "tool_use":
|
|
|
|
|
assistant_content.append({
|
|
|
|
|
"type": "tool_use",
|
|
|
|
|
"id": block.id,
|
|
|
|
|
"name": block.name,
|
|
|
|
|
"input": block.input
|
|
|
|
|
})
|
|
|
|
|
tool_uses.append(block)
|
|
|
|
|
|
|
|
|
|
self.conversation_history.append({
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
"content": assistant_content
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# Execute tools and build tool result message
|
|
|
|
|
tool_results = []
|
|
|
|
|
for tool_use in tool_uses:
|
2026-02-14 18:03:42 -07:00
|
|
|
result = execute_tool(tool_use.name, tool_use.input, healing_system=self.healing_system)
|
2026-02-13 23:38:44 -07:00
|
|
|
# Truncate large tool outputs to prevent token explosion
|
|
|
|
|
if len(result) > 5000:
|
|
|
|
|
result = result[:5000] + "\n... (output truncated)"
|
2026-02-13 19:06:28 -07:00
|
|
|
print(f"[Tool] {tool_use.name}: {result[:100]}...")
|
|
|
|
|
tool_results.append({
|
|
|
|
|
"type": "tool_result",
|
|
|
|
|
"tool_use_id": tool_use.id,
|
|
|
|
|
"content": result
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
self.conversation_history.append({
|
|
|
|
|
"role": "user",
|
|
|
|
|
"content": tool_results
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
# Unexpected stop reason
|
|
|
|
|
return f"Unexpected stop reason: {response.stop_reason}"
|
|
|
|
|
|
|
|
|
|
return "Error: Maximum tool use iterations exceeded"
|
|
|
|
|
|
|
|
|
|
def switch_model(self, provider: str) -> None:
|
|
|
|
|
"""Switch LLM provider."""
|
|
|
|
|
self.llm = LLMInterface(provider)
|
|
|
|
|
|
|
|
|
|
def shutdown(self) -> None:
|
|
|
|
|
"""Cleanup and stop background services."""
|
|
|
|
|
self.memory.close()
|
|
|
|
|
self.hooks.trigger("agent", "shutdown", {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
agent = Agent(provider="claude")
|
|
|
|
|
response = agent.chat("What's my current project?", username="alice")
|
|
|
|
|
print(response)
|