feat: RSO observation system, child safety, Discord adapter, Telegram watchdog, email attachments

Core agent improvements:
- RSO (Relevance Scoring & Observation) system: interaction_logger, memory_scorer, signal_detector
- Memory access logging (memory_access_log table) for relevance scoring; high-signal turn detection
- Rich conversation storage for notable turns; compact_conversation truncates long user messages
- Task-type classifier (query/action/analysis/creative) for observation tagging
- Nested sub-agent visibility: deep delegations now register against the main agent's manager

Child safety (Gabriel profile):
- child_safety.py: filtering, audit logging, prompt constants for restricted sessions
- .kiro/specs/child-safety-profile: requirements, design, tasks specs
- GABRIEL_BOT_PROPOSAL.md: initial proposal doc
- Reduced context window (10 msgs) and tutor-mode identity for restricted users

Telegram adapter:
- Polling watchdog: auto-restarts updater if polling drops unexpectedly
- get_me() with exponential-backoff retry on NetworkError at startup
- Correct stop() ordering: signal watchdog before cancelling tasks

Email / Gmail:
- send_email: supports file attachments (attachments list param)
- get_email: surfaces attachment metadata in response

Scheduled tasks / weather:
- Remove OpenWeatherMap API calls from morning-weather task; use wttr.in exclusively
- New scheduled tasks and scheduler state persistence

Discord:
- adapters/discord/__init__.py scaffold
- discord-plugin: MCP plugin for Claude Code Discord integration (server.ts, skills, config)

Infrastructure:
- n8n workflow exports (garvis_webhook, content_pipeline variants)
- memory_workspace: context, homelab-repo-updates, weekly observation summaries, error logs
- UCS C240 migration plan doc
- requirements.txt: new deps
- .claude/settings.json, fix_hooks.py: hook/permission tuning
This commit is contained in:
2026-04-23 07:54:01 -06:00
parent 1232490c3b
commit 916f86725d
70 changed files with 10945 additions and 187 deletions

1
observation/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Observation layer for RSO (Reflective Self-Optimization)."""

View File

@@ -0,0 +1,111 @@
"""
Interaction Logger — JSONL-based observation log for RSO Phase 1.
Writes are performed on daemon background threads so logging never
blocks response delivery. All log files live under:
memory_workspace/observation/logs/YYYY-MM-DD.jsonl
memory_workspace/observation/errors/YYYY-MM-DD.jsonl
Sub-agents MUST NOT instantiate this class. Only the main Agent
(is_sub_agent=False) creates and uses an InteractionLogger.
"""
import json
import threading
import time
from datetime import date
from datetime import datetime
from datetime import timezone
from pathlib import Path
from typing import Any
from typing import Dict
from typing import Optional
class InteractionLogger:
"""Thread-safe, async JSONL interaction logger."""
def __init__(self, workspace_dir: Path) -> None:
self._base = Path(workspace_dir) / "observation"
self._logs_dir = self._base / "logs"
self._errors_dir = self._base / "errors"
self._summaries_dir = self._base / "summaries"
# Create directories eagerly — they must exist before the first
# background write fires.
for d in (self._logs_dir, self._errors_dir, self._summaries_dir):
d.mkdir(parents=True, exist_ok=True)
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def log_interaction(self, entry: Dict[str, Any]) -> None:
"""Append an interaction entry to today's JSONL log (non-blocking)."""
path = self._logs_dir / f"{date.today().isoformat()}.jsonl"
self._fire_and_forget(path, entry)
def log_error(self, entry: Dict[str, Any]) -> None:
"""Append a structured error entry to today's error JSONL (non-blocking)."""
path = self._errors_dir / f"{date.today().isoformat()}.jsonl"
self._fire_and_forget(path, entry)
def update_signal(
self,
interaction_id: str,
signal_dict: Dict[str, Any],
) -> None:
"""Append a signal-patch record referencing a prior interaction.
Rather than mutating the original record (which would require a
read-rewrite that is neither atomic nor safe under concurrent
access), we append a lightweight patch record. The analysis
layer merges patches when it reads the log.
"""
patch = {
"record_type": "signal_patch",
"interaction_id": interaction_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"signal": signal_dict,
}
self.log_interaction(patch)
def cleanup_old_logs(self, retention_days: int = 90) -> None:
"""Delete JSONL files older than retention_days.
Called synchronously at agent startup — not on the hot path.
"""
cutoff = time.time() - (retention_days * 86400)
for directory in (self._logs_dir, self._errors_dir):
for f in directory.glob("*.jsonl"):
try:
if f.stat().st_mtime < cutoff:
f.unlink()
print(f"[ObsLogger] Deleted old log: {f.name}")
except OSError as e:
print(f"[ObsLogger] Could not delete {f}: {e}")
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _fire_and_forget(self, path: Path, record: Dict[str, Any]) -> None:
"""Launch a daemon thread to append one JSON line to *path*."""
t = threading.Thread(
target=self._append_jsonl,
args=(path, record),
daemon=True,
)
t.start()
@staticmethod
def _append_jsonl(path: Path, record: Dict[str, Any]) -> None:
"""Append one JSON line. Called only from background threads."""
try:
line = json.dumps(record, default=str, ensure_ascii=False)
with open(path, "a", encoding="utf-8") as fh:
fh.write(line + "\n")
except Exception as e:
# Last-resort console output — never raises back to caller.
print(f"[ObsLogger] Write failed ({path.name}): {e}")

View File

@@ -0,0 +1,348 @@
"""
Memory Relevance Scorer — RSO Phase 2.
Scores every indexed memory file using the formula from the RSO spec:
Score = (access_frequency × 3) + (influence_rate × 5)
- (age_days × 0.1) - (staleness_risk × 2)
Tiers:
core (>8) : High-value, actively referenced — keep at top of retrieval
active (38) : In-use memory — maintain as-is
archive (03) : Low-signal, old, or redundant — candidate for archival
stale (<0) : High staleness risk, never accessed — recommend archival
Access frequency is tracked via the memory_access_log table (added to
memory_index.db in Phase 2). On first run there is no history; scores will
be age + staleness only. Frequency builds from the next agent session onward.
Output: memory_workspace/observation/summaries/memory-scores-YYYY-MM-DD.json
"""
import json
import re
import sqlite3
import threading
import time
from datetime import date, datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
# ---------------------------------------------------------------------------
# Staleness heuristic patterns
# ---------------------------------------------------------------------------
_RE_IP = re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b")
_RE_CREDENTIALS = re.compile(
r"\b(password|passwd|credential|api[_\s\-]?key|token|secret)\b",
re.IGNORECASE,
)
_RE_STATUS = re.compile(
r"\b(running|stopped|active|inactive|enabled|disabled|up|down)\b",
re.IGNORECASE,
)
_RE_VERSION = re.compile(r"v\d+\.\d+(?:\.\d+)?|\bversion\s+\d", re.IGNORECASE)
_RE_DATE = re.compile(r"(202[0-9])-(\d{2})-(\d{2})")
_RE_DAILY_NAME = re.compile(r"(\d{4})-(\d{2})-(\d{2})\.md$")
class MemoryRelevanceScorer:
"""Score all indexed memory files for the weekly reflection agent."""
def __init__(self, workspace_dir: str) -> None:
self._workspace = Path(workspace_dir)
self._db_path = self._workspace / "memory_index.db"
self._summaries_dir = (
self._workspace / "observation" / "summaries"
)
self._summaries_dir.mkdir(parents=True, exist_ok=True)
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def score_all(self, lookback_days: int = 30) -> Dict[str, Any]:
"""Score every indexed memory file. Returns full report dict.
Cold-start mode: when the access log is empty (no history yet), the
full spec formula degrades everything to stale — useless output.
In cold-start, a baseline of 5.0 is used so age and staleness can
still differentiate files while access data accumulates.
Full formula (once data exists):
score = (access × 3) + (influence × 5) - (age × 0.1) - (staleness × 2)
Cold-start formula:
score = 5.0 - (age × 0.05) - (staleness × 2)
"""
cutoff_ms = int((time.time() - lookback_days * 86400) * 1000)
today = date.today()
db = sqlite3.connect(str(self._db_path), check_same_thread=False)
db.row_factory = sqlite3.Row
try:
files = db.execute(
"SELECT path, mtime, size FROM files ORDER BY mtime ASC"
).fetchall()
# Determine cold-start: any accesses at all in the lookback window?
total_accesses = self._total_access_count(db, cutoff_ms)
cold_start = total_accesses == 0
scored: List[Dict[str, Any]] = []
for row in files:
path = row["path"]
mtime_ms = row["mtime"]
content = self._read_file(path)
access_count = self._access_count(db, path, cutoff_ms)
age_days = self._age_days(path, mtime_ms, today)
staleness_risk = self._staleness_risk(content, today)
influence_rate = self._influence_proxy(access_count)
if cold_start:
# Gentler age decay (0.05 instead of 0.1); baseline of 5
# so files don't all collapse to stale before we have data.
score = 5.0 - (age_days * 0.05) - (staleness_risk * 2)
else:
score = (
(access_count * 3)
+ (influence_rate * 5)
- (age_days * 0.1)
- (staleness_risk * 2)
)
tier = _tier(score)
scored.append(
{
"path": path,
"score": round(score, 2),
"tier": tier,
"age_days": round(age_days, 1),
"access_frequency": access_count,
"influence_rate": round(influence_rate, 2),
"staleness_risk": round(staleness_risk, 2),
"staleness_flags": self._staleness_flags(content),
"recommendation": _recommendation(tier, age_days),
"cold_start": cold_start,
}
)
finally:
db.close()
scored.sort(key=lambda x: x["score"])
tier_counts = {"core": 0, "active": 0, "archive": 0, "stale": 0}
for e in scored:
tier_counts[e["tier"]] = tier_counts.get(e["tier"], 0) + 1
note: Optional[str] = None
if cold_start:
note = (
"COLD START: no access history yet. Scores use age+staleness only "
"(baseline 5.0, age penalty 0.05/day). Full formula activates once "
"memory_access_log accumulates data from live sessions."
)
return {
"generated_at": datetime.now().astimezone().isoformat(),
"lookback_days": lookback_days,
"cold_start": cold_start,
"files_scored": len(scored),
"note": note,
"summary": {
"core_memory": tier_counts["core"],
"active_memory": tier_counts["active"],
"archive_candidates": tier_counts["archive"],
"stale_candidates": tier_counts["stale"],
},
"archive_recommendations": [
e for e in scored
if e["recommendation"] == "archive" and e["age_days"] >= 30
],
"entries": scored,
}
def write_report(self, lookback_days: int = 30) -> Path:
"""Generate and write JSON report; returns the output path."""
report = self.score_all(lookback_days)
today = datetime.now().strftime("%Y-%m-%d")
out_path = self._summaries_dir / f"memory-scores-{today}.json"
out_path.write_text(
json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8"
)
print(
f"[MemoryScorer] Report written -> {out_path.name} "
f"({report['files_scored']} files, "
f"{report['summary']['archive_candidates']} archive candidates, "
f"{report['summary']['stale_candidates']} stale)"
)
return out_path
def print_summary(self, lookback_days: int = 30) -> None:
"""Print a human-readable summary table to stdout."""
report = self.score_all(lookback_days)
s = report["summary"]
sep = "-" * 60
print(
f"\n{sep}\n"
f"Memory Relevance Report ({report['generated_at'][:10]})\n"
f"Lookback: {lookback_days}d | Files scored: {report['files_scored']}\n"
f"{sep}\n"
f" Core (>8) : {s['core_memory']:3d}\n"
f" Active (3-8) : {s['active_memory']:3d}\n"
f" Archive (0-3) : {s['archive_candidates']:3d}\n"
f" Stale (<0) : {s['stale_candidates']:3d}\n"
f"{sep}"
)
if report.get("note"):
print(f" NOTE: {report['note']}")
archive = report["archive_recommendations"]
if archive:
print(f"\n Archive candidates (age >=30d, score <3):")
for e in archive[:10]:
flags = ", ".join(e["staleness_flags"]) or "none"
print(
f" {e['path']:<40} "
f"score={e['score']:>6.2f} "
f"age={e['age_days']:>5.0f}d "
f"flags=[{flags}]"
)
if len(archive) > 10:
print(f" ... and {len(archive) - 10} more")
print()
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _read_file(self, rel_path: str) -> str:
try:
return (self._workspace / rel_path).read_text(encoding="utf-8")
except Exception:
return ""
def _total_access_count(
self, db: sqlite3.Connection, cutoff_ms: int
) -> int:
"""Total accesses across all paths in the lookback window."""
try:
row = db.execute(
"SELECT COUNT(*) AS n FROM memory_access_log WHERE accessed_at >= ?",
(cutoff_ms,),
).fetchone()
return row["n"] if row else 0
except sqlite3.OperationalError:
return 0
def _access_count(
self, db: sqlite3.Connection, path: str, cutoff_ms: int
) -> int:
try:
row = db.execute(
"SELECT COUNT(*) AS n FROM memory_access_log "
"WHERE path = ? AND accessed_at >= ?",
(path, cutoff_ms),
).fetchone()
return row["n"] if row else 0
except sqlite3.OperationalError:
# Table doesn't exist yet on very first run before schema migration
return 0
def _age_days(
self, path: str, mtime_ms: int, today: date
) -> float:
"""Age in days — prefer date extracted from filename for daily logs."""
m = _RE_DAILY_NAME.search(path)
if m:
try:
file_date = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
return float((today - file_date).days)
except ValueError:
pass
return (time.time() - mtime_ms / 1000) / 86400
def _staleness_risk(self, content: str, today: date) -> float:
"""0.03.0 staleness score from content heuristics."""
score = 0.0
if _RE_IP.search(content):
score += 1.0
if _RE_CREDENTIALS.search(content):
score += 1.0
if _RE_STATUS.search(content):
score += 0.5
if _RE_VERSION.search(content):
score += 0.5
# Past dates mentioned in content (more than 30 days ago)
for m in _RE_DATE.finditer(content):
try:
mentioned = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
if (today - mentioned).days > 30:
score += 0.5
break # Only penalise once per file
except ValueError:
pass
return min(score, 3.0)
def _staleness_flags(self, content: str) -> List[str]:
flags: List[str] = []
if _RE_IP.search(content):
flags.append("ip_addresses")
if _RE_CREDENTIALS.search(content):
flags.append("credentials")
if _RE_STATUS.search(content):
flags.append("status_references")
if _RE_VERSION.search(content):
flags.append("version_numbers")
return flags
@staticmethod
def _influence_proxy(access_count: int) -> float:
"""Proxy for influence rate — no real data until access log fills."""
if access_count >= 5:
return 0.8
if access_count >= 2:
return 0.5
if access_count == 1:
return 0.3
return 0.0
# ---------------------------------------------------------------------------
# Pure functions
# ---------------------------------------------------------------------------
def _tier(score: float) -> str:
if score > 8:
return "core"
if score >= 3:
return "active"
if score >= 0:
return "archive"
return "stale"
def _recommendation(tier: str, age_days: float) -> str:
if tier in ("core", "active"):
return "keep"
if tier == "archive":
return "archive" if age_days >= 60 else "monitor"
# stale — archive rather than delete (Phase 3 safety rule)
return "archive"
# ---------------------------------------------------------------------------
# CLI entry point
# ---------------------------------------------------------------------------
if __name__ == "__main__":
import sys
workspace = sys.argv[1] if len(sys.argv) > 1 else "./memory_workspace"
scorer = MemoryRelevanceScorer(workspace)
scorer.print_summary()
path = scorer.write_report()
print(f"Full report: {path}")

View File

@@ -0,0 +1,109 @@
"""
User Signal Detector — heuristic classifier for follow-up messages.
Classifies the user's next message (after Garvis responded) into one of:
"positive" — explicit praise / satisfaction
"negative" — explicit dissatisfaction / error report
"correction" — user corrects or rephrases Garvis
"refinement" — user extends the prior request
"neutral" — new topic or unclassifiable
This is intentionally heuristic. Aggregate patterns matter more than
any individual classification. The analysis layer must account for noise.
No project imports — safe to use anywhere without circular-dep risk.
"""
from typing import Optional
# ---------------------------------------------------------------------------
# Keyword tables (extend here without touching classify_signal logic)
# ---------------------------------------------------------------------------
_POSITIVE_WORDS = frozenset({
"perfect", "great", "excellent", "exactly", "thanks", "thank",
"awesome", "good", "nice", "nailed", "correct",
"yes", "yep", "sure", "right", "wonderful",
"brilliant", "fantastic", "helpful", "appreciate",
})
_NEGATIVE_WORDS = frozenset({
"no", "nope", "wrong", "incorrect", "bad", "terrible", "awful",
"failed", "broken", "error", "mistake", "off",
})
_CORRECTION_WORDS = frozenset({
"actually", "wait", "sorry", "clarify",
})
_CORRECTION_PHRASES = frozenset({
"i meant", "i mean", "what i meant", "not that",
"let me clarify", "to clarify", "scratch that",
"hold on", "my bad", "that's not", "not what i",
"try again", "you missed",
})
_REFINEMENT_PHRASES = frozenset({
"can you also", "what about", "and also", "additionally",
"could you also", "one more", "another thing", "on top of that",
"while you're at it", "in addition",
"can you add", "please add", "add to that",
})
# Time threshold under which a quick reply skews toward correction.
_REPHRASE_THRESHOLD_S: float = 30.0
def classify_signal(
follow_up_text: str,
time_delta_seconds: Optional[float] = None,
) -> str:
"""Classify a follow-up message as a user feedback signal.
Args:
follow_up_text: The user's next message after Garvis responded.
time_delta_seconds: Seconds elapsed since the previous response.
If provided and < 30, rapid replies without positive signals
skew toward "correction".
Returns:
One of: "positive", "negative", "correction", "refinement", "neutral"
"""
if not follow_up_text or not follow_up_text.strip():
return "neutral"
text_lower = follow_up_text.lower().strip()
words = set(text_lower.split())
# --- Explicit positive ---
if words & _POSITIVE_WORDS:
return "positive"
# --- Multi-word correction phrases (check before single words) ---
for phrase in _CORRECTION_PHRASES:
if phrase in text_lower:
return "correction"
# --- Single-word correction signals ---
if words & _CORRECTION_WORDS:
return "correction"
# --- Explicit negative ---
if words & _NEGATIVE_WORDS:
return "negative"
# --- Refinement patterns ---
for phrase in _REFINEMENT_PHRASES:
if phrase in text_lower:
return "refinement"
# --- Rapid rephrase heuristic ---
# If the user responds very quickly and no other signal matched,
# treat it as a soft correction (likely dissatisfied with the answer).
if (
time_delta_seconds is not None
and time_delta_seconds < _REPHRASE_THRESHOLD_S
):
return "correction"
return "neutral"