diff --git a/.gitignore b/.gitignore index 44c46d8..3cadc1c 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,4 @@ config/google_oauth_token.json # Logs *.log +logs/ diff --git a/LOGGING.md b/LOGGING.md new file mode 100644 index 0000000..838b650 --- /dev/null +++ b/LOGGING.md @@ -0,0 +1,207 @@ +# Structured Logging System + +## Overview + +Ajarbot now includes a comprehensive structured logging system to track errors, tool executions, and system behavior. + +## Log Files + +All logs are stored in the `logs/` directory (gitignored): + +### 1. `ajarbot.log` - Main Application Log +- **Format**: JSON (one record per line) +- **Level**: DEBUG and above +- **Size**: Rotates at 10MB, keeps 5 backups +- **Contents**: All application events, tool executions, LLM calls + +### 2. `errors.log` - Error-Only Log +- **Format**: JSON +- **Level**: ERROR and CRITICAL only +- **Size**: Rotates at 5MB, keeps 3 backups +- **Contents**: Only errors and critical issues for quick diagnosis + +### 3. `tools.log` - Tool Execution Log +- **Format**: JSON +- **Level**: INFO and above +- **Size**: Rotates at 10MB, keeps 3 backups +- **Contents**: Every tool call with inputs, outputs, duration, and success/failure + +## Log Format + +### JSON Structure +```json +{ + "timestamp": "2026-02-16T12:34:56.789Z", + "level": "ERROR", + "logger": "tools", + "message": "Tool failed: permanent_note", + "module": "tools", + "function": "execute_tool", + "line": 500, + "extra": { + "tool_name": "permanent_note", + "inputs": {"title": "Test", "content": "..."}, + "success": false, + "error": "Unknown tool error", + "duration_ms": 123.45 + } +} +``` + +### Tool Log Example +```json +{ + "timestamp": "2026-02-16T06:00:15.234Z", + "level": "INFO", + "logger": "tools", + "message": "Tool executed: get_weather", + "extra": { + "tool_name": "get_weather", + "inputs": {"location": "Centennial, CO"}, + "success": true, + "result_length": 456, + "duration_ms": 1234.56 + } +} +``` + +## Usage in Code + +### Get a Logger +```python +from logging_config import get_logger, get_tool_logger + +# General logger +logger = get_logger("my_module") + +# Specialized tool logger +tool_logger = get_tool_logger() +``` + +### Logging Methods + +**Basic logging:** +```python +logger.debug("Detailed debug info", key="value") +logger.info("Informational message", user_id=123) +logger.warning("Warning message", issue="something") +logger.error("Error occurred", exc_info=True, error_code="E001") +logger.critical("Critical system failure", exc_info=True) +``` + +**Tool execution logging:** +```python +tool_logger.log_tool_call( + tool_name="permanent_note", + inputs={"title": "Test", "content": "..."}, + success=True, + result="Created note successfully", + duration_ms=123.45 +) +``` + +## Analyzing Logs + +### View Recent Errors +```bash +# Last 20 errors +tail -20 logs/errors.log | jq . + +# Errors from specific module +grep '"module":"tools"' logs/errors.log | jq . +``` + +### Tool Performance Analysis +```bash +# Average tool execution time +cat logs/tools.log | jq -r '.extra.duration_ms' | awk '{sum+=$1; count++} END {print sum/count}' + +# Failed tools +grep '"success":false' logs/tools.log | jq -r '.extra.tool_name' | sort | uniq -c + +# Slowest tool calls +cat logs/tools.log | jq -r '[.extra.tool_name, .extra.duration_ms] | @csv' | sort -t, -k2 -rn | head -10 +``` + +### Find Specific Errors +```bash +# Max token errors +grep -i "max.*token" logs/errors.log | jq . + +# Tool iteration limits +grep -i "iteration.*exceeded" logs/ajarbot.log | jq . + +# MCP tool failures +grep '"tool_name":"permanent_note"' logs/tools.log | grep '"success":false' | jq . +``` + +## Error Patterns to Watch + +1. **Max Tool Iterations** - Search: `"iteration.*exceeded"` +2. **Max Tokens** - Search: `"max.*token"` +3. **MCP Tool Failures** - Search: `"Unknown tool"` or failed MCP tool names +4. **Slow Tools** - Tools taking > 5000ms +5. **Repeated Failures** - Same tool failing multiple times + +## Maintenance + +### Log Rotation +Logs automatically rotate when they reach size limits: +- `ajarbot.log`: 10MB → keeps 5 old files (50MB total) +- `errors.log`: 5MB → keeps 3 old files (15MB total) +- `tools.log`: 10MB → keeps 3 old files (30MB total) + +Total max disk usage: ~95MB + +### Manual Cleanup +```bash +# Remove old logs +rm logs/*.log.* + +# Clear all logs (careful!) +rm logs/*.log +``` + +## Integration + +### Automatic Integration +The logging system is automatically integrated into: +- ✅ `tools.py` - All tool executions logged +- ✅ Console output - Human-readable format +- ✅ File logs - JSON format for parsing + +### Adding Logging to New Modules +```python +from logging_config import get_logger + +logger = get_logger(__name__) + +def my_function(): + logger.info("Starting operation", operation_id=123) + try: + # Do work + logger.debug("Step completed", step=1) + except Exception as e: + logger.error("Operation failed", exc_info=True, operation_id=123) +``` + +## Benefits + +1. **Quick Error Diagnosis**: Separate `errors.log` for immediate issue identification +2. **Performance Tracking**: Tool execution times and success rates +3. **Historical Analysis**: JSON format enables programmatic analysis +4. **Debugging**: Full context with inputs, outputs, and stack traces +5. **Monitoring**: Easy to parse logs for alerting systems + +## Future Enhancements + +- [ ] Web dashboard for log visualization +- [ ] Real-time log streaming via WebSocket +- [ ] Automatic error rate alerts (email/Telegram) +- [ ] Integration with external monitoring (Datadog, CloudWatch) +- [ ] Log aggregation for multi-instance deployments + +--- + +**Last Updated:** 2026-02-16 +**Log System Version:** 1.0 diff --git a/logging_config.py b/logging_config.py new file mode 100644 index 0000000..8e23573 --- /dev/null +++ b/logging_config.py @@ -0,0 +1,203 @@ +""" +Structured logging configuration for Ajarbot. + +Provides consistent logging across all components with: +- Rotating file logs (prevents disk space issues) +- Separate error log for quick issue identification +- JSON-structured logs for easy parsing +- Console output for development +""" + +import json +import logging +import logging.handlers +from pathlib import Path +from datetime import datetime +from typing import Optional, Dict, Any + + +# Log directory +LOG_DIR = Path("logs") +LOG_DIR.mkdir(exist_ok=True) + +# Log file paths +MAIN_LOG = LOG_DIR / "ajarbot.log" +ERROR_LOG = LOG_DIR / "errors.log" +TOOL_LOG = LOG_DIR / "tools.log" + + +class JSONFormatter(logging.Formatter): + """Format log records as JSON for easy parsing.""" + + def format(self, record: logging.LogRecord) -> str: + """Format log record as JSON.""" + log_data = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + "module": record.module, + "function": record.funcName, + "line": record.lineno, + } + + # Add exception info if present + if record.exc_info: + log_data["exception"] = self.formatException(record.exc_info) + + # Add extra fields if present + if hasattr(record, "extra_data"): + log_data["extra"] = record.extra_data + + return json.dumps(log_data) + + +class StructuredLogger: + """Wrapper for structured logging with context.""" + + def __init__(self, name: str): + """Initialize structured logger.""" + self.logger = logging.getLogger(name) + self._setup_handlers() + + def _setup_handlers(self): + """Set up log handlers if not already configured.""" + if self.logger.handlers: + return # Already configured + + self.logger.setLevel(logging.DEBUG) + + # Console handler (human-readable) + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + console_handler.setFormatter(console_formatter) + + # Main log file handler (JSON, rotating) + main_handler = logging.handlers.RotatingFileHandler( + MAIN_LOG, + maxBytes=10 * 1024 * 1024, # 10MB + backupCount=5, + encoding="utf-8" + ) + main_handler.setLevel(logging.DEBUG) + main_handler.setFormatter(JSONFormatter()) + + # Error log file handler (JSON, errors only) + error_handler = logging.handlers.RotatingFileHandler( + ERROR_LOG, + maxBytes=5 * 1024 * 1024, # 5MB + backupCount=3, + encoding="utf-8" + ) + error_handler.setLevel(logging.ERROR) + error_handler.setFormatter(JSONFormatter()) + + # Add handlers + self.logger.addHandler(console_handler) + self.logger.addHandler(main_handler) + self.logger.addHandler(error_handler) + + def log(self, level: int, message: str, extra: Optional[Dict[str, Any]] = None): + """Log message with optional extra context.""" + if extra: + self.logger.log(level, message, extra={"extra_data": extra}) + else: + self.logger.log(level, message) + + def debug(self, message: str, **kwargs): + """Log debug message.""" + self.log(logging.DEBUG, message, kwargs if kwargs else None) + + def info(self, message: str, **kwargs): + """Log info message.""" + self.log(logging.INFO, message, kwargs if kwargs else None) + + def warning(self, message: str, **kwargs): + """Log warning message.""" + self.log(logging.WARNING, message, kwargs if kwargs else None) + + def error(self, message: str, exc_info: bool = False, **kwargs): + """Log error message.""" + self.logger.error( + message, + exc_info=exc_info, + extra={"extra_data": kwargs} if kwargs else None + ) + + def critical(self, message: str, exc_info: bool = False, **kwargs): + """Log critical message.""" + self.logger.critical( + message, + exc_info=exc_info, + extra={"extra_data": kwargs} if kwargs else None + ) + + +class ToolLogger(StructuredLogger): + """Specialized logger for tool execution tracking.""" + + def __init__(self): + """Initialize tool logger with separate log file.""" + super().__init__("tools") + + # Add specialized tool log handler + tool_handler = logging.handlers.RotatingFileHandler( + TOOL_LOG, + maxBytes=10 * 1024 * 1024, # 10MB + backupCount=3, + encoding="utf-8" + ) + tool_handler.setLevel(logging.INFO) + tool_handler.setFormatter(JSONFormatter()) + self.logger.addHandler(tool_handler) + + def log_tool_call( + self, + tool_name: str, + inputs: Dict[str, Any], + success: bool, + result: Optional[str] = None, + error: Optional[str] = None, + duration_ms: Optional[float] = None + ): + """Log tool execution with structured data.""" + log_data = { + "tool_name": tool_name, + "inputs": inputs, + "success": success, + "duration_ms": duration_ms, + } + + if success: + log_data["result_length"] = len(result) if result else 0 + self.info(f"Tool executed: {tool_name}", **log_data) + else: + log_data["error"] = error + self.error(f"Tool failed: {tool_name}", **log_data) + + +# Global logger instances +def get_logger(name: str) -> StructuredLogger: + """Get or create a structured logger.""" + return StructuredLogger(name) + + +def get_tool_logger() -> ToolLogger: + """Get the tool execution logger.""" + return ToolLogger() + + +# Configure root logger +logging.basicConfig( + level=logging.WARNING, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) + +# Suppress noisy third-party loggers +logging.getLogger("anthropic").setLevel(logging.WARNING) +logging.getLogger("httpx").setLevel(logging.WARNING) +logging.getLogger("httpcore").setLevel(logging.WARNING) diff --git a/tools.py b/tools.py index fcdeba6..ef4b6b8 100644 --- a/tools.py +++ b/tools.py @@ -341,6 +341,12 @@ TOOL_DEFINITIONS = [ def execute_tool(tool_name: str, tool_input: Dict[str, Any], healing_system: Any = None) -> str: """Execute a tool and return the result as a string.""" + import time + from logging_config import get_tool_logger + + logger = get_tool_logger() + start_time = time.time() + try: # MCP tools (zettelkasten + web_fetch) - route to mcp_tools.py MCP_TOOLS = { @@ -375,37 +381,61 @@ def execute_tool(tool_name: str, tool_input: Dict[str, Any], healing_system: Any # Convert result to string if needed if isinstance(result, dict): if "error" in result: - return f"Error: {result['error']}" + error_msg = f"Error: {result['error']}" + duration_ms = (time.time() - start_time) * 1000 + logger.log_tool_call( + tool_name=tool_name, + inputs=tool_input, + success=False, + error=error_msg, + duration_ms=duration_ms + ) + return error_msg elif "content" in result: - return result["content"] + result_str = result["content"] else: - return str(result) - return str(result) + result_str = str(result) + else: + result_str = str(result) + + # Log successful execution + duration_ms = (time.time() - start_time) * 1000 + logger.log_tool_call( + tool_name=tool_name, + inputs=tool_input, + success=True, + result=result_str, + duration_ms=duration_ms + ) + return result_str # File tools (traditional handlers - kept for backward compatibility) + # Execute traditional tool and capture result + result_str = None + if tool_name == "read_file": - return _read_file(tool_input["file_path"]) + result_str = _read_file(tool_input["file_path"]) elif tool_name == "write_file": - return _write_file(tool_input["file_path"], tool_input["content"]) + result_str = _write_file(tool_input["file_path"], tool_input["content"]) elif tool_name == "edit_file": - return _edit_file( + result_str = _edit_file( tool_input["file_path"], tool_input["old_text"], tool_input["new_text"], ) elif tool_name == "list_directory": path = tool_input.get("path", ".") - return _list_directory(path) + result_str = _list_directory(path) elif tool_name == "run_command": command = tool_input["command"] working_dir = tool_input.get("working_dir", ".") - return _run_command(command, working_dir) + result_str = _run_command(command, working_dir) elif tool_name == "get_weather": location = tool_input.get("location", "Phoenix, US") - return _get_weather(location) + result_str = _get_weather(location) # Gmail tools elif tool_name == "send_email": - return _send_email( + result_str = _send_email( to=tool_input["to"], subject=tool_input["subject"], body=tool_input["body"], @@ -413,25 +443,25 @@ def execute_tool(tool_name: str, tool_input: Dict[str, Any], healing_system: Any reply_to_message_id=tool_input.get("reply_to_message_id"), ) elif tool_name == "read_emails": - return _read_emails( + result_str = _read_emails( query=tool_input.get("query", ""), max_results=tool_input.get("max_results", 10), include_body=tool_input.get("include_body", False), ) elif tool_name == "get_email": - return _get_email( + result_str = _get_email( message_id=tool_input["message_id"], format_type=tool_input.get("format", "text"), ) # Calendar tools elif tool_name == "read_calendar": - return _read_calendar( + result_str = _read_calendar( days_ahead=tool_input.get("days_ahead", 7), calendar_id=tool_input.get("calendar_id", "primary"), max_results=tool_input.get("max_results", 20), ) elif tool_name == "create_calendar_event": - return _create_calendar_event( + result_str = _create_calendar_event( summary=tool_input["summary"], start_time=tool_input["start_time"], end_time=tool_input["end_time"], @@ -440,13 +470,13 @@ def execute_tool(tool_name: str, tool_input: Dict[str, Any], healing_system: Any calendar_id=tool_input.get("calendar_id", "primary"), ) elif tool_name == "search_calendar": - return _search_calendar( + result_str = _search_calendar( query=tool_input["query"], calendar_id=tool_input.get("calendar_id", "primary"), ) # Contacts tools elif tool_name == "create_contact": - return _create_contact( + result_str = _create_contact( given_name=tool_input["given_name"], family_name=tool_input.get("family_name", ""), email=tool_input.get("email", ""), @@ -454,17 +484,51 @@ def execute_tool(tool_name: str, tool_input: Dict[str, Any], healing_system: Any notes=tool_input.get("notes"), ) elif tool_name == "list_contacts": - return _list_contacts( + result_str = _list_contacts( max_results=tool_input.get("max_results", 100), query=tool_input.get("query"), ) elif tool_name == "get_contact": - return _get_contact( + result_str = _get_contact( resource_name=tool_input["resource_name"], ) + + # Log successful traditional tool execution + if result_str is not None: + duration_ms = (time.time() - start_time) * 1000 + logger.log_tool_call( + tool_name=tool_name, + inputs=tool_input, + success=True, + result=result_str, + duration_ms=duration_ms + ) + return result_str else: - return f"Error: Unknown tool '{tool_name}'" + duration_ms = (time.time() - start_time) * 1000 + error_msg = f"Error: Unknown tool '{tool_name}'" + logger.log_tool_call( + tool_name=tool_name, + inputs=tool_input, + success=False, + error=error_msg, + duration_ms=duration_ms + ) + return error_msg except Exception as e: + duration_ms = (time.time() - start_time) * 1000 + error_msg = str(e) + + # Log the error + logger.log_tool_call( + tool_name=tool_name, + inputs=tool_input, + success=False, + error=error_msg, + duration_ms=duration_ms + ) + + # Capture in healing system if available if healing_system: healing_system.capture_error( error=e, @@ -472,7 +536,7 @@ def execute_tool(tool_name: str, tool_input: Dict[str, Any], healing_system: Any intent=f"Executing {tool_name} tool", context={"tool_name": tool_name, "input": tool_input}, ) - return f"Error executing {tool_name}: {str(e)}" + return f"Error executing {tool_name}: {error_msg}" # Maximum characters of tool output to return (prevents token explosion)