Implement self-healing system Phase 1: Error capture and logging
- Add SelfHealingSystem with error observation infrastructure - Capture errors with full context: type, message, stack trace, intent, inputs - Log to MEMORY.md with deduplication (max 3 attempts per error signature) - Integrate error capture in agent, tools, runtime, and scheduler - Non-invasive: preserves all existing error handling behavior - Foundation for future diagnosis and auto-fixing capabilities Phase 1 of 4-phase rollout - observation only, no auto-fixing yet. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
14
agent.py
14
agent.py
@@ -7,6 +7,7 @@ from heartbeat import Heartbeat
|
||||
from hooks import HooksSystem
|
||||
from llm_interface import LLMInterface
|
||||
from memory_system import MemorySystem
|
||||
from self_healing import SelfHealingSystem
|
||||
from tools import TOOL_DEFINITIONS, execute_tool
|
||||
|
||||
# Maximum number of recent messages to include in LLM context
|
||||
@@ -31,6 +32,7 @@ class Agent:
|
||||
self.hooks = HooksSystem()
|
||||
self.conversation_history: List[dict] = []
|
||||
self._chat_lock = threading.Lock()
|
||||
self.healing_system = SelfHealingSystem(self.memory, self)
|
||||
|
||||
self.memory.sync()
|
||||
self.hooks.trigger("agent", "startup", {"workspace_dir": workspace_dir})
|
||||
@@ -188,6 +190,16 @@ class Agent:
|
||||
except Exception as e:
|
||||
error_msg = f"LLM API error: {e}"
|
||||
print(f"[Agent] {error_msg}")
|
||||
self.healing_system.capture_error(
|
||||
error=e,
|
||||
component="agent.py:_chat_inner",
|
||||
intent="Calling LLM API for chat response",
|
||||
context={
|
||||
"model": self.llm.model,
|
||||
"message_preview": user_message[:100],
|
||||
"iteration": iteration,
|
||||
},
|
||||
)
|
||||
return f"Sorry, I encountered an error communicating with the AI model. Please try again."
|
||||
|
||||
# Check stop reason
|
||||
@@ -245,7 +257,7 @@ class Agent:
|
||||
# Execute tools and build tool result message
|
||||
tool_results = []
|
||||
for tool_use in tool_uses:
|
||||
result = execute_tool(tool_use.name, tool_use.input)
|
||||
result = execute_tool(tool_use.name, tool_use.input, healing_system=self.healing_system)
|
||||
# Truncate large tool outputs to prevent token explosion
|
||||
if len(result) > 5000:
|
||||
result = result[:5000] + "\n... (output truncated)"
|
||||
|
||||
Reference in New Issue
Block a user