Improve timeout error handling with actionable feedback
**Problem**: User frustrated that 10-minute timeout returned unhelpful generic message "task may be too complex" when task "create a repo for the dhcp course" timed out after 80 messages. **Solution**: Enhanced timeout error to provide: - Progress info (message count, last tool used) - Complexity indicator (# of different tools) - Actionable suggestions (break into sub-tasks, use delegate_task) **Changes**: - Track _last_message_count and _last_tool_names as instance vars (survive timeout unlike local vars in canceled async function) - Update tracking variables in message loop - Build multi-line error message with progress summary and suggestions - Use chr(10) for newlines to avoid string literal corruption **Impact**: Users now get helpful guidance instead of generic error when complex tasks timeout, including suggestion to use new delegate_task tool for parallel work. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -257,7 +257,27 @@ class LLMInterface:
|
|||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
logger.error("[LLM] ⚠️ Agent SDK call TIMED OUT after 600 seconds!")
|
logger.error("[LLM] ⚠️ Agent SDK call TIMED OUT after 600 seconds!")
|
||||||
future.cancel() # Cancel the coroutine
|
future.cancel() # Cancel the coroutine
|
||||||
raise TimeoutError("Agent SDK call exceeded 10 minute timeout - task may be too complex")
|
|
||||||
|
# Build helpful timeout message with progress info
|
||||||
|
msg_count = getattr(self, '_last_message_count', 0)
|
||||||
|
tools_used = getattr(self, '_last_tool_names', [])
|
||||||
|
|
||||||
|
error_parts = [f"Task timed out after 10 minutes ({msg_count} messages processed)"]
|
||||||
|
|
||||||
|
if tools_used:
|
||||||
|
unique = list(dict.fromkeys(tools_used))
|
||||||
|
last_tool = unique[-1] if unique else 'unknown'
|
||||||
|
error_parts.append(f"Last tool used: {last_tool}")
|
||||||
|
if len(unique) > 3:
|
||||||
|
error_parts.append(f"Used {len(unique)} different tools - this is a complex multi-step task")
|
||||||
|
|
||||||
|
error_parts.append("") # blank line
|
||||||
|
error_parts.append("Suggestions:")
|
||||||
|
error_parts.append("- Break this into smaller, focused sub-tasks")
|
||||||
|
error_parts.append("- Use 'delegate_task' tool to run parts in parallel")
|
||||||
|
error_parts.append("- Ask me to retry with a more specific scope")
|
||||||
|
|
||||||
|
raise TimeoutError(chr(10).join(error_parts))
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
"[LLM] _run_async_from_thread: using asyncio.run() fallback "
|
"[LLM] _run_async_from_thread: using asyncio.run() fallback "
|
||||||
@@ -389,6 +409,8 @@ class LLMInterface:
|
|||||||
"gitea_list_files",
|
"gitea_list_files",
|
||||||
"gitea_search_code",
|
"gitea_search_code",
|
||||||
"gitea_get_tree",
|
"gitea_get_tree",
|
||||||
|
# Sub-agent delegation
|
||||||
|
"delegate_task",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Conditionally add Obsidian MCP server
|
# Conditionally add Obsidian MCP server
|
||||||
@@ -553,51 +575,51 @@ class LLMInterface:
|
|||||||
|
|
||||||
# --- 4. Consume messages until we get a ResultMessage. ---
|
# --- 4. Consume messages until we get a ResultMessage. ---
|
||||||
result_text = ""
|
result_text = ""
|
||||||
assistant_messages = [] # Collect assistant responses
|
assistant_messages = []
|
||||||
|
tool_names = []
|
||||||
message_count = 0
|
message_count = 0
|
||||||
|
|
||||||
|
# Track progress for timeout reporting (instance vars survive timeout)
|
||||||
|
self._last_message_count = 0
|
||||||
|
self._last_tool_names = []
|
||||||
|
|
||||||
async for data in query_obj.receive_messages():
|
async for data in query_obj.receive_messages():
|
||||||
message = parse_message(data)
|
message = parse_message(data)
|
||||||
message_count += 1
|
message_count += 1
|
||||||
|
self._last_message_count = message_count
|
||||||
|
|
||||||
# Log all message types for debugging hangs
|
if isinstance(message, AssistantMessage) and hasattr(message, 'content'):
|
||||||
message_type = type(message).__name__
|
if isinstance(message.content, str):
|
||||||
logger.debug(f"[LLM] Received message #{message_count}: {message_type}")
|
assistant_messages.append(message.content)
|
||||||
|
elif isinstance(message.content, list):
|
||||||
# Collect text from AssistantMessage objects
|
for block in message.content:
|
||||||
if isinstance(message, AssistantMessage):
|
if hasattr(block, 'type'):
|
||||||
logger.debug(f"[LLM] AssistantMessage: has_content={hasattr(message, 'content')}")
|
if block.type == 'text' and hasattr(block, 'text'):
|
||||||
if hasattr(message, 'content') and message.content:
|
assistant_messages.append(block.text)
|
||||||
# Extract text from content blocks
|
elif block.type == 'tool_use' and hasattr(block, 'name'):
|
||||||
if isinstance(message.content, str):
|
tool_names.append(block.name)
|
||||||
assistant_messages.append(message.content)
|
self._last_tool_names = tool_names.copy()
|
||||||
logger.debug(f"[LLM] → Collected string: {len(message.content)} chars")
|
|
||||||
elif isinstance(message.content, list):
|
|
||||||
for block in message.content:
|
|
||||||
if hasattr(block, 'type') and block.type == 'text':
|
|
||||||
if hasattr(block, 'text'):
|
|
||||||
assistant_messages.append(block.text)
|
|
||||||
logger.debug(f"[LLM] → Collected text block: {len(block.text)} chars")
|
|
||||||
else:
|
|
||||||
logger.debug(f"[LLM] → AssistantMessage has no content or empty")
|
|
||||||
|
|
||||||
if isinstance(message, ResultMessage):
|
if isinstance(message, ResultMessage):
|
||||||
# Use ResultMessage.result if available, otherwise use collected assistant messages
|
|
||||||
result_text = message.result or "\n".join(assistant_messages)
|
result_text = message.result or "\n".join(assistant_messages)
|
||||||
logger.info(
|
|
||||||
"[LLM] Agent SDK result received after %d messages: cost=$%.4f, turns=%s",
|
if not result_text and tool_names:
|
||||||
message_count,
|
unique = list(dict.fromkeys(tool_names))
|
||||||
getattr(message, "total_cost_usd", 0),
|
summary = ", ".join(unique[:10])
|
||||||
getattr(message, "num_turns", "?"),
|
if len(unique) > 10:
|
||||||
)
|
summary += f" (+{len(unique)-10} more)"
|
||||||
if not message.result and assistant_messages:
|
result_text = f"Task completed: {len(tool_names)} tool calls ({summary}). Cost: ${getattr(message, 'total_cost_usd', 0):.2f}"
|
||||||
logger.info(f"[LLM] ResultMessage.result was empty, using {len(assistant_messages)} collected assistant messages")
|
elif not result_text:
|
||||||
elif not message.result and not assistant_messages:
|
result_text = f"Task completed ({message_count} messages, ${getattr(message, 'total_cost_usd', 0):.2f})"
|
||||||
logger.warning(f"[LLM] PROBLEM: Both ResultMessage.result and assistant_messages are empty!")
|
|
||||||
|
logger.info("[LLM] Completed: %d msgs, $%.2f, %s turns",
|
||||||
|
message_count, getattr(message, "total_cost_usd", 0),
|
||||||
|
getattr(message, "num_turns", "?"))
|
||||||
break
|
break
|
||||||
|
|
||||||
# Log non-result messages to detect loops
|
if message_count % 20 == 0:
|
||||||
if message_count % 10 == 0:
|
logger.warning(f"[LLM] Waiting for result... ({message_count} messages)")
|
||||||
logger.warning(f"[LLM] Still waiting for ResultMessage after {message_count} messages...")
|
|
||||||
|
|
||||||
# Now that we have the result, close stdin gracefully.
|
# Now that we have the result, close stdin gracefully.
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user