feat: RSO observation system, child safety, Discord adapter, Telegram watchdog, email attachments

Core agent improvements: - RSO (Relevance Scoring & Observation) system: interaction_logger, memory_scorer, signal_detector - Memory access logging (memory_access_log table) for relevance scoring; high-signal turn detection - Rich conversation storage for notable turns; compact_conversation truncates long user messages - Task-type classifier (query/action/analysis/creative) for observation tagging - Nested sub-agent visibility: deep delegations now register against the main agent's manager Child safety (Gabriel profile): - child_safety.py: filtering, audit logging, prompt constants for restricted sessions - .kiro/specs/child-safety-profile: requirements, design, tasks specs - GABRIEL_BOT_PROPOSAL.md: initial proposal doc - Reduced context window (10 msgs) and tutor-mode identity for restricted users Telegram adapter: - Polling watchdog: auto-restarts updater if polling drops unexpectedly - get_me() with exponential-backoff retry on NetworkError at startup - Correct stop() ordering: signal watchdog before cancelling tasks Email / Gmail: - send_email: supports file attachments (attachments list param) - get_email: surfaces attachment metadata in response Scheduled tasks / weather: - Remove OpenWeatherMap API calls from morning-weather task; use wttr.in exclusively - New scheduled tasks and scheduler state persistence Discord: - adapters/discord/__init__.py scaffold - discord-plugin: MCP plugin for Claude Code Discord integration (server.ts, skills, config) Infrastructure: - n8n workflow exports (garvis_webhook, content_pipeline variants) - memory_workspace: context, homelab-repo-updates, weekly observation summaries, error logs - UCS C240 migration plan doc - requirements.txt: new deps - .claude/settings.json, fix_hooks.py: hook/permission tuning
2026-04-23 07:54:01 -06:00
parent 1232490c3b
commit 916f86725d
70 changed files with 10945 additions and 187 deletions
--- a/observation/init.py
+++ b/observation/init.py
@@ -0,0 +1 @@
+"""Observation layer for RSO (Reflective Self-Optimization)."""
--- a/observation/interaction_logger.py
+++ b/observation/interaction_logger.py
@@ -0,0 +1,111 @@
+"""
+Interaction Logger — JSONL-based observation log for RSO Phase 1.
+
+Writes are performed on daemon background threads so logging never
+blocks response delivery.  All log files live under:
+
+    memory_workspace/observation/logs/YYYY-MM-DD.jsonl
+    memory_workspace/observation/errors/YYYY-MM-DD.jsonl
+
+Sub-agents MUST NOT instantiate this class.  Only the main Agent
+(is_sub_agent=False) creates and uses an InteractionLogger.
+"""
+
+import json
+import threading
+import time
+from datetime import date
+from datetime import datetime
+from datetime import timezone
+from pathlib import Path
+from typing import Any
+from typing import Dict
+from typing import Optional
+
+
+class InteractionLogger:
+    """Thread-safe, async JSONL interaction logger."""
+
+    def __init__(self, workspace_dir: Path) -> None:
+        self._base = Path(workspace_dir) / "observation"
+        self._logs_dir = self._base / "logs"
+        self._errors_dir = self._base / "errors"
+        self._summaries_dir = self._base / "summaries"
+
+        # Create directories eagerly — they must exist before the first
+        # background write fires.
+        for d in (self._logs_dir, self._errors_dir, self._summaries_dir):
+            d.mkdir(parents=True, exist_ok=True)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def log_interaction(self, entry: Dict[str, Any]) -> None:
+        """Append an interaction entry to today's JSONL log (non-blocking)."""
+        path = self._logs_dir / f"{date.today().isoformat()}.jsonl"
+        self._fire_and_forget(path, entry)
+
+    def log_error(self, entry: Dict[str, Any]) -> None:
+        """Append a structured error entry to today's error JSONL (non-blocking)."""
+        path = self._errors_dir / f"{date.today().isoformat()}.jsonl"
+        self._fire_and_forget(path, entry)
+
+    def update_signal(
+        self,
+        interaction_id: str,
+        signal_dict: Dict[str, Any],
+    ) -> None:
+        """Append a signal-patch record referencing a prior interaction.
+
+        Rather than mutating the original record (which would require a
+        read-rewrite that is neither atomic nor safe under concurrent
+        access), we append a lightweight patch record.  The analysis
+        layer merges patches when it reads the log.
+        """
+        patch = {
+            "record_type": "signal_patch",
+            "interaction_id": interaction_id,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "signal": signal_dict,
+        }
+        self.log_interaction(patch)
+
+    def cleanup_old_logs(self, retention_days: int = 90) -> None:
+        """Delete JSONL files older than retention_days.
+
+        Called synchronously at agent startup — not on the hot path.
+        """
+        cutoff = time.time() - (retention_days * 86400)
+        for directory in (self._logs_dir, self._errors_dir):
+            for f in directory.glob("*.jsonl"):
+                try:
+                    if f.stat().st_mtime < cutoff:
+                        f.unlink()
+                        print(f"[ObsLogger] Deleted old log: {f.name}")
+                except OSError as e:
+                    print(f"[ObsLogger] Could not delete {f}: {e}")
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _fire_and_forget(self, path: Path, record: Dict[str, Any]) -> None:
+        """Launch a daemon thread to append one JSON line to *path*."""
+        t = threading.Thread(
+            target=self._append_jsonl,
+            args=(path, record),
+            daemon=True,
+        )
+        t.start()
+
+    @staticmethod
+    def _append_jsonl(path: Path, record: Dict[str, Any]) -> None:
+        """Append one JSON line.  Called only from background threads."""
+        try:
+            line = json.dumps(record, default=str, ensure_ascii=False)
+            with open(path, "a", encoding="utf-8") as fh:
+                fh.write(line + "\n")
+        except Exception as e:
+            # Last-resort console output — never raises back to caller.
+            print(f"[ObsLogger] Write failed ({path.name}): {e}")
--- a/observation/memory_scorer.py
+++ b/observation/memory_scorer.py
@@ -0,0 +1,348 @@
+"""
+Memory Relevance Scorer — RSO Phase 2.
+
+Scores every indexed memory file using the formula from the RSO spec:
+
+    Score = (access_frequency × 3) + (influence_rate × 5)
+            - (age_days × 0.1) - (staleness_risk × 2)
+
+Tiers:
+    core    (>8)  : High-value, actively referenced — keep at top of retrieval
+    active  (3–8) : In-use memory — maintain as-is
+    archive (0–3) : Low-signal, old, or redundant — candidate for archival
+    stale   (<0)  : High staleness risk, never accessed — recommend archival
+
+Access frequency is tracked via the memory_access_log table (added to
+memory_index.db in Phase 2). On first run there is no history; scores will
+be age + staleness only. Frequency builds from the next agent session onward.
+
+Output: memory_workspace/observation/summaries/memory-scores-YYYY-MM-DD.json
+"""
+
+import json
+import re
+import sqlite3
+import threading
+import time
+from datetime import date, datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+# ---------------------------------------------------------------------------
+# Staleness heuristic patterns
+# ---------------------------------------------------------------------------
+
+_RE_IP = re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b")
+_RE_CREDENTIALS = re.compile(
+    r"\b(password|passwd|credential|api[_\s\-]?key|token|secret)\b",
+    re.IGNORECASE,
+)
+_RE_STATUS = re.compile(
+    r"\b(running|stopped|active|inactive|enabled|disabled|up|down)\b",
+    re.IGNORECASE,
+)
+_RE_VERSION = re.compile(r"v\d+\.\d+(?:\.\d+)?|\bversion\s+\d", re.IGNORECASE)
+_RE_DATE = re.compile(r"(202[0-9])-(\d{2})-(\d{2})")
+_RE_DAILY_NAME = re.compile(r"(\d{4})-(\d{2})-(\d{2})\.md$")
+
+
+class MemoryRelevanceScorer:
+    """Score all indexed memory files for the weekly reflection agent."""
+
+    def __init__(self, workspace_dir: str) -> None:
+        self._workspace = Path(workspace_dir)
+        self._db_path = self._workspace / "memory_index.db"
+        self._summaries_dir = (
+            self._workspace / "observation" / "summaries"
+        )
+        self._summaries_dir.mkdir(parents=True, exist_ok=True)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def score_all(self, lookback_days: int = 30) -> Dict[str, Any]:
+        """Score every indexed memory file. Returns full report dict.
+
+        Cold-start mode: when the access log is empty (no history yet), the
+        full spec formula degrades everything to stale — useless output.
+        In cold-start, a baseline of 5.0 is used so age and staleness can
+        still differentiate files while access data accumulates.
+
+        Full formula (once data exists):
+            score = (access × 3) + (influence × 5) - (age × 0.1) - (staleness × 2)
+
+        Cold-start formula:
+            score = 5.0 - (age × 0.05) - (staleness × 2)
+        """
+        cutoff_ms = int((time.time() - lookback_days * 86400) * 1000)
+        today = date.today()
+
+        db = sqlite3.connect(str(self._db_path), check_same_thread=False)
+        db.row_factory = sqlite3.Row
+        try:
+            files = db.execute(
+                "SELECT path, mtime, size FROM files ORDER BY mtime ASC"
+            ).fetchall()
+
+            # Determine cold-start: any accesses at all in the lookback window?
+            total_accesses = self._total_access_count(db, cutoff_ms)
+            cold_start = total_accesses == 0
+
+            scored: List[Dict[str, Any]] = []
+            for row in files:
+                path = row["path"]
+                mtime_ms = row["mtime"]
+
+                content = self._read_file(path)
+                access_count = self._access_count(db, path, cutoff_ms)
+                age_days = self._age_days(path, mtime_ms, today)
+                staleness_risk = self._staleness_risk(content, today)
+                influence_rate = self._influence_proxy(access_count)
+
+                if cold_start:
+                    # Gentler age decay (0.05 instead of 0.1); baseline of 5
+                    # so files don't all collapse to stale before we have data.
+                    score = 5.0 - (age_days * 0.05) - (staleness_risk * 2)
+                else:
+                    score = (
+                        (access_count * 3)
+                        + (influence_rate * 5)
+                        - (age_days * 0.1)
+                        - (staleness_risk * 2)
+                    )
+
+                tier = _tier(score)
+                scored.append(
+                    {
+                        "path": path,
+                        "score": round(score, 2),
+                        "tier": tier,
+                        "age_days": round(age_days, 1),
+                        "access_frequency": access_count,
+                        "influence_rate": round(influence_rate, 2),
+                        "staleness_risk": round(staleness_risk, 2),
+                        "staleness_flags": self._staleness_flags(content),
+                        "recommendation": _recommendation(tier, age_days),
+                        "cold_start": cold_start,
+                    }
+                )
+
+        finally:
+            db.close()
+
+        scored.sort(key=lambda x: x["score"])
+
+        tier_counts = {"core": 0, "active": 0, "archive": 0, "stale": 0}
+        for e in scored:
+            tier_counts[e["tier"]] = tier_counts.get(e["tier"], 0) + 1
+
+        note: Optional[str] = None
+        if cold_start:
+            note = (
+                "COLD START: no access history yet. Scores use age+staleness only "
+                "(baseline 5.0, age penalty 0.05/day). Full formula activates once "
+                "memory_access_log accumulates data from live sessions."
+            )
+
+        return {
+            "generated_at": datetime.now().astimezone().isoformat(),
+            "lookback_days": lookback_days,
+            "cold_start": cold_start,
+            "files_scored": len(scored),
+            "note": note,
+            "summary": {
+                "core_memory": tier_counts["core"],
+                "active_memory": tier_counts["active"],
+                "archive_candidates": tier_counts["archive"],
+                "stale_candidates": tier_counts["stale"],
+            },
+            "archive_recommendations": [
+                e for e in scored
+                if e["recommendation"] == "archive" and e["age_days"] >= 30
+            ],
+            "entries": scored,
+        }
+
+    def write_report(self, lookback_days: int = 30) -> Path:
+        """Generate and write JSON report; returns the output path."""
+        report = self.score_all(lookback_days)
+        today = datetime.now().strftime("%Y-%m-%d")
+        out_path = self._summaries_dir / f"memory-scores-{today}.json"
+        out_path.write_text(
+            json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8"
+        )
+        print(
+            f"[MemoryScorer] Report written -> {out_path.name} "
+            f"({report['files_scored']} files, "
+            f"{report['summary']['archive_candidates']} archive candidates, "
+            f"{report['summary']['stale_candidates']} stale)"
+        )
+        return out_path
+
+    def print_summary(self, lookback_days: int = 30) -> None:
+        """Print a human-readable summary table to stdout."""
+        report = self.score_all(lookback_days)
+        s = report["summary"]
+        sep = "-" * 60
+        print(
+            f"\n{sep}\n"
+            f"Memory Relevance Report  ({report['generated_at'][:10]})\n"
+            f"Lookback: {lookback_days}d  |  Files scored: {report['files_scored']}\n"
+            f"{sep}\n"
+            f"  Core     (>8)  : {s['core_memory']:3d}\n"
+            f"  Active   (3-8) : {s['active_memory']:3d}\n"
+            f"  Archive  (0-3) : {s['archive_candidates']:3d}\n"
+            f"  Stale    (<0)  : {s['stale_candidates']:3d}\n"
+            f"{sep}"
+        )
+        if report.get("note"):
+            print(f"  NOTE: {report['note']}")
+
+        archive = report["archive_recommendations"]
+        if archive:
+            print(f"\n  Archive candidates (age >=30d, score <3):")
+            for e in archive[:10]:
+                flags = ", ".join(e["staleness_flags"]) or "none"
+                print(
+                    f"    {e['path']:<40}  "
+                    f"score={e['score']:>6.2f}  "
+                    f"age={e['age_days']:>5.0f}d  "
+                    f"flags=[{flags}]"
+                )
+            if len(archive) > 10:
+                print(f"    ... and {len(archive) - 10} more")
+        print()
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _read_file(self, rel_path: str) -> str:
+        try:
+            return (self._workspace / rel_path).read_text(encoding="utf-8")
+        except Exception:
+            return ""
+
+    def _total_access_count(
+        self, db: sqlite3.Connection, cutoff_ms: int
+    ) -> int:
+        """Total accesses across all paths in the lookback window."""
+        try:
+            row = db.execute(
+                "SELECT COUNT(*) AS n FROM memory_access_log WHERE accessed_at >= ?",
+                (cutoff_ms,),
+            ).fetchone()
+            return row["n"] if row else 0
+        except sqlite3.OperationalError:
+            return 0
+
+    def _access_count(
+        self, db: sqlite3.Connection, path: str, cutoff_ms: int
+    ) -> int:
+        try:
+            row = db.execute(
+                "SELECT COUNT(*) AS n FROM memory_access_log "
+                "WHERE path = ? AND accessed_at >= ?",
+                (path, cutoff_ms),
+            ).fetchone()
+            return row["n"] if row else 0
+        except sqlite3.OperationalError:
+            # Table doesn't exist yet on very first run before schema migration
+            return 0
+
+    def _age_days(
+        self, path: str, mtime_ms: int, today: date
+    ) -> float:
+        """Age in days — prefer date extracted from filename for daily logs."""
+        m = _RE_DAILY_NAME.search(path)
+        if m:
+            try:
+                file_date = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
+                return float((today - file_date).days)
+            except ValueError:
+                pass
+        return (time.time() - mtime_ms / 1000) / 86400
+
+    def _staleness_risk(self, content: str, today: date) -> float:
+        """0.0–3.0 staleness score from content heuristics."""
+        score = 0.0
+        if _RE_IP.search(content):
+            score += 1.0
+        if _RE_CREDENTIALS.search(content):
+            score += 1.0
+        if _RE_STATUS.search(content):
+            score += 0.5
+        if _RE_VERSION.search(content):
+            score += 0.5
+        # Past dates mentioned in content (more than 30 days ago)
+        for m in _RE_DATE.finditer(content):
+            try:
+                mentioned = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
+                if (today - mentioned).days > 30:
+                    score += 0.5
+                    break  # Only penalise once per file
+            except ValueError:
+                pass
+        return min(score, 3.0)
+
+    def _staleness_flags(self, content: str) -> List[str]:
+        flags: List[str] = []
+        if _RE_IP.search(content):
+            flags.append("ip_addresses")
+        if _RE_CREDENTIALS.search(content):
+            flags.append("credentials")
+        if _RE_STATUS.search(content):
+            flags.append("status_references")
+        if _RE_VERSION.search(content):
+            flags.append("version_numbers")
+        return flags
+
+    @staticmethod
+    def _influence_proxy(access_count: int) -> float:
+        """Proxy for influence rate — no real data until access log fills."""
+        if access_count >= 5:
+            return 0.8
+        if access_count >= 2:
+            return 0.5
+        if access_count == 1:
+            return 0.3
+        return 0.0
+
+
+# ---------------------------------------------------------------------------
+# Pure functions
+# ---------------------------------------------------------------------------
+
+def _tier(score: float) -> str:
+    if score > 8:
+        return "core"
+    if score >= 3:
+        return "active"
+    if score >= 0:
+        return "archive"
+    return "stale"
+
+
+def _recommendation(tier: str, age_days: float) -> str:
+    if tier in ("core", "active"):
+        return "keep"
+    if tier == "archive":
+        return "archive" if age_days >= 60 else "monitor"
+    # stale — archive rather than delete (Phase 3 safety rule)
+    return "archive"
+
+
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    import sys
+
+    workspace = sys.argv[1] if len(sys.argv) > 1 else "./memory_workspace"
+    scorer = MemoryRelevanceScorer(workspace)
+    scorer.print_summary()
+    path = scorer.write_report()
+    print(f"Full report: {path}")
--- a/observation/signal_detector.py
+++ b/observation/signal_detector.py
@@ -0,0 +1,109 @@
+"""
+User Signal Detector — heuristic classifier for follow-up messages.
+
+Classifies the user's next message (after Garvis responded) into one of:
+    "positive"    — explicit praise / satisfaction
+    "negative"    — explicit dissatisfaction / error report
+    "correction"  — user corrects or rephrases Garvis
+    "refinement"  — user extends the prior request
+    "neutral"     — new topic or unclassifiable
+
+This is intentionally heuristic.  Aggregate patterns matter more than
+any individual classification.  The analysis layer must account for noise.
+
+No project imports — safe to use anywhere without circular-dep risk.
+"""
+
+from typing import Optional
+
+# ---------------------------------------------------------------------------
+# Keyword tables (extend here without touching classify_signal logic)
+# ---------------------------------------------------------------------------
+
+_POSITIVE_WORDS = frozenset({
+    "perfect", "great", "excellent", "exactly", "thanks", "thank",
+    "awesome", "good", "nice", "nailed", "correct",
+    "yes", "yep", "sure", "right", "wonderful",
+    "brilliant", "fantastic", "helpful", "appreciate",
+})
+
+_NEGATIVE_WORDS = frozenset({
+    "no", "nope", "wrong", "incorrect", "bad", "terrible", "awful",
+    "failed", "broken", "error", "mistake", "off",
+})
+
+_CORRECTION_WORDS = frozenset({
+    "actually", "wait", "sorry", "clarify",
+})
+
+_CORRECTION_PHRASES = frozenset({
+    "i meant", "i mean", "what i meant", "not that",
+    "let me clarify", "to clarify", "scratch that",
+    "hold on", "my bad", "that's not", "not what i",
+    "try again", "you missed",
+})
+
+_REFINEMENT_PHRASES = frozenset({
+    "can you also", "what about", "and also", "additionally",
+    "could you also", "one more", "another thing", "on top of that",
+    "while you're at it", "in addition",
+    "can you add", "please add", "add to that",
+})
+
+# Time threshold under which a quick reply skews toward correction.
+_REPHRASE_THRESHOLD_S: float = 30.0
+
+
+def classify_signal(
+    follow_up_text: str,
+    time_delta_seconds: Optional[float] = None,
+) -> str:
+    """Classify a follow-up message as a user feedback signal.
+
+    Args:
+        follow_up_text: The user's next message after Garvis responded.
+        time_delta_seconds: Seconds elapsed since the previous response.
+            If provided and < 30, rapid replies without positive signals
+            skew toward "correction".
+
+    Returns:
+        One of: "positive", "negative", "correction", "refinement", "neutral"
+    """
+    if not follow_up_text or not follow_up_text.strip():
+        return "neutral"
+
+    text_lower = follow_up_text.lower().strip()
+    words = set(text_lower.split())
+
+    # --- Explicit positive ---
+    if words & _POSITIVE_WORDS:
+        return "positive"
+
+    # --- Multi-word correction phrases (check before single words) ---
+    for phrase in _CORRECTION_PHRASES:
+        if phrase in text_lower:
+            return "correction"
+
+    # --- Single-word correction signals ---
+    if words & _CORRECTION_WORDS:
+        return "correction"
+
+    # --- Explicit negative ---
+    if words & _NEGATIVE_WORDS:
+        return "negative"
+
+    # --- Refinement patterns ---
+    for phrase in _REFINEMENT_PHRASES:
+        if phrase in text_lower:
+            return "refinement"
+
+    # --- Rapid rephrase heuristic ---
+    # If the user responds very quickly and no other signal matched,
+    # treat it as a soft correction (likely dissatisfied with the answer).
+    if (
+        time_delta_seconds is not None
+        and time_delta_seconds < _REPHRASE_THRESHOLD_S
+    ):
+        return "correction"
+
+    return "neutral"
				`@@ -0,0 +1 @@`
				`"""Observation layer for RSO (Reflective Self-Optimization)."""`