Improve timeout error handling with actionable feedback
**Problem**: User frustrated that 10-minute timeout returned unhelpful generic message "task may be too complex" when task "create a repo for the dhcp course" timed out after 80 messages. **Solution**: Enhanced timeout error to provide: - Progress info (message count, last tool used) - Complexity indicator (# of different tools) - Actionable suggestions (break into sub-tasks, use delegate_task) **Changes**: - Track _last_message_count and _last_tool_names as instance vars (survive timeout unlike local vars in canceled async function) - Update tracking variables in message loop - Build multi-line error message with progress summary and suggestions - Use chr(10) for newlines to avoid string literal corruption **Impact**: Users now get helpful guidance instead of generic error when complex tasks timeout, including suggestion to use new delegate_task tool for parallel work. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -257,7 +257,27 @@ class LLMInterface:
|
||||
except TimeoutError:
|
||||
logger.error("[LLM] ⚠️ Agent SDK call TIMED OUT after 600 seconds!")
|
||||
future.cancel() # Cancel the coroutine
|
||||
raise TimeoutError("Agent SDK call exceeded 10 minute timeout - task may be too complex")
|
||||
|
||||
# Build helpful timeout message with progress info
|
||||
msg_count = getattr(self, '_last_message_count', 0)
|
||||
tools_used = getattr(self, '_last_tool_names', [])
|
||||
|
||||
error_parts = [f"Task timed out after 10 minutes ({msg_count} messages processed)"]
|
||||
|
||||
if tools_used:
|
||||
unique = list(dict.fromkeys(tools_used))
|
||||
last_tool = unique[-1] if unique else 'unknown'
|
||||
error_parts.append(f"Last tool used: {last_tool}")
|
||||
if len(unique) > 3:
|
||||
error_parts.append(f"Used {len(unique)} different tools - this is a complex multi-step task")
|
||||
|
||||
error_parts.append("") # blank line
|
||||
error_parts.append("Suggestions:")
|
||||
error_parts.append("- Break this into smaller, focused sub-tasks")
|
||||
error_parts.append("- Use 'delegate_task' tool to run parts in parallel")
|
||||
error_parts.append("- Ask me to retry with a more specific scope")
|
||||
|
||||
raise TimeoutError(chr(10).join(error_parts))
|
||||
else:
|
||||
logger.info(
|
||||
"[LLM] _run_async_from_thread: using asyncio.run() fallback "
|
||||
@@ -389,6 +409,8 @@ class LLMInterface:
|
||||
"gitea_list_files",
|
||||
"gitea_search_code",
|
||||
"gitea_get_tree",
|
||||
# Sub-agent delegation
|
||||
"delegate_task",
|
||||
]
|
||||
|
||||
# Conditionally add Obsidian MCP server
|
||||
@@ -553,51 +575,51 @@ class LLMInterface:
|
||||
|
||||
# --- 4. Consume messages until we get a ResultMessage. ---
|
||||
result_text = ""
|
||||
assistant_messages = [] # Collect assistant responses
|
||||
assistant_messages = []
|
||||
tool_names = []
|
||||
message_count = 0
|
||||
|
||||
# Track progress for timeout reporting (instance vars survive timeout)
|
||||
self._last_message_count = 0
|
||||
self._last_tool_names = []
|
||||
|
||||
async for data in query_obj.receive_messages():
|
||||
message = parse_message(data)
|
||||
message_count += 1
|
||||
self._last_message_count = message_count
|
||||
|
||||
# Log all message types for debugging hangs
|
||||
message_type = type(message).__name__
|
||||
logger.debug(f"[LLM] Received message #{message_count}: {message_type}")
|
||||
|
||||
# Collect text from AssistantMessage objects
|
||||
if isinstance(message, AssistantMessage):
|
||||
logger.debug(f"[LLM] AssistantMessage: has_content={hasattr(message, 'content')}")
|
||||
if hasattr(message, 'content') and message.content:
|
||||
# Extract text from content blocks
|
||||
if isinstance(message.content, str):
|
||||
assistant_messages.append(message.content)
|
||||
logger.debug(f"[LLM] → Collected string: {len(message.content)} chars")
|
||||
elif isinstance(message.content, list):
|
||||
for block in message.content:
|
||||
if hasattr(block, 'type') and block.type == 'text':
|
||||
if hasattr(block, 'text'):
|
||||
assistant_messages.append(block.text)
|
||||
logger.debug(f"[LLM] → Collected text block: {len(block.text)} chars")
|
||||
else:
|
||||
logger.debug(f"[LLM] → AssistantMessage has no content or empty")
|
||||
if isinstance(message, AssistantMessage) and hasattr(message, 'content'):
|
||||
if isinstance(message.content, str):
|
||||
assistant_messages.append(message.content)
|
||||
elif isinstance(message.content, list):
|
||||
for block in message.content:
|
||||
if hasattr(block, 'type'):
|
||||
if block.type == 'text' and hasattr(block, 'text'):
|
||||
assistant_messages.append(block.text)
|
||||
elif block.type == 'tool_use' and hasattr(block, 'name'):
|
||||
tool_names.append(block.name)
|
||||
self._last_tool_names = tool_names.copy()
|
||||
|
||||
if isinstance(message, ResultMessage):
|
||||
# Use ResultMessage.result if available, otherwise use collected assistant messages
|
||||
result_text = message.result or "\n".join(assistant_messages)
|
||||
logger.info(
|
||||
"[LLM] Agent SDK result received after %d messages: cost=$%.4f, turns=%s",
|
||||
message_count,
|
||||
getattr(message, "total_cost_usd", 0),
|
||||
getattr(message, "num_turns", "?"),
|
||||
)
|
||||
if not message.result and assistant_messages:
|
||||
logger.info(f"[LLM] ResultMessage.result was empty, using {len(assistant_messages)} collected assistant messages")
|
||||
elif not message.result and not assistant_messages:
|
||||
logger.warning(f"[LLM] PROBLEM: Both ResultMessage.result and assistant_messages are empty!")
|
||||
|
||||
if not result_text and tool_names:
|
||||
unique = list(dict.fromkeys(tool_names))
|
||||
summary = ", ".join(unique[:10])
|
||||
if len(unique) > 10:
|
||||
summary += f" (+{len(unique)-10} more)"
|
||||
result_text = f"Task completed: {len(tool_names)} tool calls ({summary}). Cost: ${getattr(message, 'total_cost_usd', 0):.2f}"
|
||||
elif not result_text:
|
||||
result_text = f"Task completed ({message_count} messages, ${getattr(message, 'total_cost_usd', 0):.2f})"
|
||||
|
||||
logger.info("[LLM] Completed: %d msgs, $%.2f, %s turns",
|
||||
message_count, getattr(message, "total_cost_usd", 0),
|
||||
getattr(message, "num_turns", "?"))
|
||||
break
|
||||
|
||||
# Log non-result messages to detect loops
|
||||
if message_count % 10 == 0:
|
||||
logger.warning(f"[LLM] Still waiting for ResultMessage after {message_count} messages...")
|
||||
if message_count % 20 == 0:
|
||||
logger.warning(f"[LLM] Waiting for result... ({message_count} messages)")
|
||||
|
||||
|
||||
# Now that we have the result, close stdin gracefully.
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user