"""Child safety module: filtering, audit logging, and prompt constants for restricted sessions.""" import dataclasses import json import re import threading import time from datetime import date, datetime, timezone from pathlib import Path from typing import Optional, Tuple from adapters.base import InboundMessage # Key used in InboundMessage.metadata to signal a preprocessor block to the runtime. _CS_BLOCKED_KEY = "_cs_blocked" # --- Prompt constants --- CHILD_TUTOR_IDENTITY = ( "You are a coding mentor and game development tutor. You help Gabriel — a 13-year-old " "building Roblox games in Lua — learn to code and think like a developer. You are not a " "general-purpose assistant; for this session, your entire focus is helping Gabriel build " "skills and create games." ) CHILD_MAX_CONTEXT_MESSAGES = 10 SESSION_UPDATE_INSTRUCTION = """\ At the end of this conversation, use your file write tool to update \ `memory_workspace/users/gabriel_context.md` with: - ## Active Project: what Gabriel is building (name + one sentence description) - ## Last Session (today's date): what was worked on, bugs fixed, concepts covered - ## Open Threads: anything Gabriel mentioned wanting to do next - ## Skills Introduced: cumulative list of concepts taught, with date first introduced Keep the file under 40 lines. Overwrite it completely each time.""" FIRST_RUN_BLOCK = """\ FIRST SESSION: This is Gabriel's very first message. Before answering his question, \ send a short friendly welcome (4-5 sentences max). Cover: - What you can help with: Lua, Roblox Studio, game design, coding questions - That you guide and teach rather than just hand over answers - That you'll remember his projects between sessions - Invite him to tell you what he's building (or answer if he already has) Casual and warm -- not a formal introduction. Then answer his question normally.""" CHILD_GUARDRAIL_BLOCK = """\ === CHILD SAFE MODE === You are talking to Gabriel, a 13-year-old who is learning game development and Lua scripting. Your role is educator and mentor -- not answer key. --- CONTENT RULES --- ALWAYS ENCOURAGED: - Lua scripting, Roblox Studio mechanics, game physics - Horror game design: atmosphere, enemy AI, jump scares, damage systems - Weapon mechanics IN GAMES: hitboxes, shooting mechanics, damage values, animations - General coding concepts, algorithms, creative writing, school subjects NEVER ALLOWED -- refuse politely, no explanation of why: - Real-world instructions for harming people or animals - How to build, obtain, or use actual weapons - Sexual or romantic content of any kind - Explicit language or profanity - Sharing or asking for real personal information GRAY AREA RULE: If a question mentions weapons, violence, or dangerous topics AND there is any reasonable game/educational interpretation -- assume game context and help enthusiastically. Only refuse if the request is unambiguously real-world harm with no plausible game framing. --- TEACHING APPROACH --- Your goal is to build Gabriel's skills and confidence over time, not to hand him answers. Use this approach every time: 1. ASSESS FIRST (for non-trivial questions): Before diving in, ask what he's already tried or what he thinks might work. Skip this for simple factual lookups ("what does pairs() do?"). 2. BREAK IT DOWN: Split the problem into smaller steps. Guide through one step at a time. "Let's start with just getting the bullet to appear -- we'll worry about damage after." 3. CODE + EXPLANATION always together: When you show code, explain what each meaningful part does in plain language immediately after. Never a bare code block with no context. Ask "does that make sense?" or "what do you think this line is doing?" after showing it. 4. LEAVE SOMETHING FOR HIM: After giving an example, leave one small piece for Gabriel to write himself. "I've done the shooting part -- can you add the check for ammo count?" 5. GUIDE THE DEBUG, DON'T SOLVE IT: When he shares broken code, point him toward the area with the issue rather than fixing it directly. "Look at what your variable is on the third loop -- what's it equal to at that point?" 6. CELEBRATE THE ATTEMPT: Always acknowledge what's working before addressing what isn't. "The loop structure is solid -- that's the tricky bit. Just one small fix needed here." 7. CONNECT TO PAST WORK: When a new concept resembles something covered before, say so. "This is the same idea as the enemy spawner loop -- same structure, different purpose." 8. DIRECT ANSWERS are fine for: simple factual questions, API lookups, syntax checks, "what does X do?" questions. Only apply the full teaching approach for problem-solving. 9. AI LITERACY -- teach him to use you well (weave in naturally, never lecture): - When he asks something vague, model good question structure before answering: "Just checking -- you want the damage to apply on touch, or only when the enemy attacks?" - When context runs out, explain it plainly: "I can only hold so much conversation in memory. Next session, remind me what you're building and I'll be right back up to speed." - Teach the ideal coding question format when the moment comes up naturally: "Next time: what your code does now + what you want + what you've tried = fastest answer." - Flag your assumptions so he learns to spot ambiguity: "I'm assuming this resets on respawn -- let me know if that's not what you meant." RESPONSE LENGTH: Keep responses focused. Step-by-step means one step at a time -- don't front-load everything. Short, clear, then wait for his response before continuing. TONE: Enthusiastic, encouraging, patient. Short sentences. No jargon without explanation. Talk to him like a smart friend who happens to know a lot about game dev, not like a textbook. === END CHILD SAFE MODE ===""" # --- Compiled filter patterns (once at import, not per-message) --- _HARD_BLOCK_PATTERNS = [re.compile(p, re.IGNORECASE) for p in [ r"\b(sex|porn|nude|naked|explicit)\b", r"\bhow (do i|to|can i).{0,40}(kill|hurt|stab|shoot|harm).{0,30}(myself|yourself)\b", r"\bhow (do i|to|can i).{0,40}(hurt|stab|kill|attack|beat up|harm).{0,30}(my |a )?(sister|brother|mom|dad|teacher|classmate|friend|kid|child|person|someone|people)\b", r"\b(give me|what is|find).{0,30}(address|phone number|school|location).{0,30}(of|for)\b", ]] _GAME_CONTEXT_SIGNALS = [re.compile(p, re.IGNORECASE) for p in [ r"\bin (my |the |a )?(game|roblox|studio|script|map|level|world|place)\b", r"\b(lua|roblox|studio|npc|hitbox|raycast|humanoid|workspace|basepart|tool|part)\b", r"\b(code|script|function|method|module|class|variable|loop|event|animate|tween)\b", r"\b(damage|health|respawn|kill|destroy)\b.{0,30}\b(player|npc|enemy|mob|character|humanoid)\b", r"\bhow (do i|to|can i) (make|get|set|add|create|implement|build|script)\b", ]] _CONDITIONAL_BLOCK_PATTERNS = [re.compile(p, re.IGNORECASE) for p in [ r"\bhow (do i|to|can i).{0,40}(use|wield|make|build).{0,30}(knife|gun|pistol|rifle|weapon|sword|bomb).{0,30}(hurt|harm|attack|fight|cut|stab|shoot)\b", r"\bhow (do i|to|can i).{0,40}(hurt|fight|attack|beat).{0,30}(someone|people|person|kid|child)\b", r"\b(buy|get|obtain|find).{0,30}(drugs?|weed|cocaine|meth|pills)\b", ]] _EXPLICIT_OUTPUT_PATTERNS = [re.compile(p, re.IGNORECASE) for p in [ r"\b(porn|pornography|nude|naked|explicit sex|sexual content)\b", r"\b(fuck|motherfucker|cunt)\b", r"(?:step \d+.{0,80}){3,}.{0,200}(?:how to harm|how to hurt|how to kill|how to build a (?:bomb|weapon|gun))", ]] _BLOCKED_REPLY = ( "That's not something I can help with! Want to work on your Roblox game instead? " "I'm great at scripting and game mechanics." ) _FLAGGED_REPLY = ( "I ran into a bit of a snag there. Try rephrasing, or ask me something " "about your Roblox game -- I love helping with scripts and game design!" ) class ChildSafetyConfig: """Loads and exposes the child_safety block from adapters.local.yaml.""" def __init__(self, restricted_users: list, audit_retention_days: int) -> None: self.restricted_users = [u.lower() for u in restricted_users] self.audit_retention_days = audit_retention_days @classmethod def from_yaml(cls, config_path: Path) -> Optional["ChildSafetyConfig"]: try: import yaml with open(config_path, encoding="utf-8") as f: config = yaml.safe_load(f) or {} cs = config.get("child_safety", {}) if not cs: return None return cls( restricted_users=cs.get("restricted_users", []), audit_retention_days=cs.get("audit_retention_days", 365), ) except Exception as e: print(f"[ChildSafety] Could not load config from {config_path}: {e}") return None def is_restricted(self, username: str) -> bool: return username.lower() in self.restricted_users class ChildAuditLogger: """Thread-safe, non-blocking JSONL audit logger for child user interactions. Mirrors the pattern from observation/interaction_logger.py. Writes to memory_workspace/audit/{username}/YYYY-MM-DD.jsonl. Directory is created on first write, not at init. """ def __init__(self, workspace_dir: Path) -> None: self._audit_base = Path(workspace_dir) / "audit" def log( self, username: str, platform: str, action: str, filter_stage: Optional[str], filter_reason: Optional[str], message: str, response: Optional[str], ) -> None: """Append one audit entry (non-blocking, daemon thread).""" audit_dir = self._audit_base / username path = audit_dir / f"{date.today().isoformat()}.jsonl" record = { "timestamp": datetime.now(timezone.utc).isoformat(), "username": username, "platform": platform, "action": action, "filter_stage": filter_stage, "filter_reason": filter_reason, "message": message, "response": response, } t = threading.Thread( target=self._append_jsonl, args=(audit_dir, path, record), daemon=True, ) t.start() def cleanup_old_logs(self, retention_days: int) -> None: """Delete JSONL files older than retention_days. Called at startup.""" cutoff = time.time() - (retention_days * 86400) if not self._audit_base.exists(): return for user_dir in self._audit_base.iterdir(): if not user_dir.is_dir(): continue for f in user_dir.glob("*.jsonl"): try: if f.stat().st_mtime < cutoff: f.unlink() print(f"[ChildAudit] Deleted old log: {f}") except OSError as e: print(f"[ChildAudit] Could not delete {f}: {e}") @staticmethod def _append_jsonl(audit_dir: Path, path: Path, record: dict) -> None: try: audit_dir.mkdir(parents=True, exist_ok=True) line = json.dumps(record, default=str, ensure_ascii=False) with open(path, "a", encoding="utf-8") as fh: fh.write(line + "\n") except Exception as e: print(f"[ChildAudit] Write failed ({path.name}): {e}") class ChildSafetyFilter: """Intent-pattern input/output filter for restricted child user sessions.""" def __init__(self, config: ChildSafetyConfig, audit: ChildAuditLogger) -> None: self._config = config self._audit = audit def preprocess( self, message: InboundMessage ) -> Tuple[Optional[InboundMessage], Optional[str]]: """Filter an inbound message. Returns (message, None) to pass through unchanged, or (None, reply_text) to block with a safe canned reply. """ if not self._config.is_restricted(message.username): return message, None text = message.text # Step 1: hard block — always active, no context exemption for pattern in _HARD_BLOCK_PATTERNS: if pattern.search(text): self._audit.log( username=message.username, platform=message.platform, action="blocked", filter_stage="preprocessor", filter_reason=f"hard_block:{pattern.pattern[:60]}", message=text, response=None, ) return None, _BLOCKED_REPLY # Step 2: game dev context signals exempt the message from conditional blocks has_game_context = any(p.search(text) for p in _GAME_CONTEXT_SIGNALS) # Step 3: conditional block — skipped entirely when game context is detected if not has_game_context: for pattern in _CONDITIONAL_BLOCK_PATTERNS: if pattern.search(text): self._audit.log( username=message.username, platform=message.platform, action="blocked", filter_stage="preprocessor", filter_reason=f"conditional_block:{pattern.pattern[:60]}", message=text, response=None, ) return None, _BLOCKED_REPLY # Step 4: pass through — response field filled in by postprocessor self._audit.log( username=message.username, platform=message.platform, action="allowed", filter_stage="preprocessor", filter_reason=None, message=text, response=None, ) return message, None def postprocess(self, response: str, message: InboundMessage) -> str: """Scan LLM response for explicit content; replace with safe fallback if flagged.""" if not self._config.is_restricted(message.username): return response for pattern in _EXPLICIT_OUTPUT_PATTERNS: if pattern.search(response): self._audit.log( username=message.username, platform=message.platform, action="flagged", filter_stage="postprocessor", filter_reason=f"explicit_output:{pattern.pattern[:60]}", message=message.text, response=response, ) return _FLAGGED_REPLY self._audit.log( username=message.username, platform=message.platform, action="allowed", filter_stage="postprocessor", filter_reason=None, message=message.text, response=response, ) return response def preprocess_adapter(self, message: InboundMessage) -> InboundMessage: """Runtime-compatible wrapper: encodes a block signal in message metadata.""" result_msg, reply_text = self.preprocess(message) if result_msg is None: # Signal to runtime: skip agent, deliver reply_text directly new_meta = {**message.metadata, _CS_BLOCKED_KEY: reply_text} return dataclasses.replace(message, metadata=new_meta) return result_msg def postprocess_adapter(self, response: str, message: InboundMessage) -> str: """Runtime-compatible wrapper: skips LLM scan if message was already blocked.""" if message.metadata.get(_CS_BLOCKED_KEY): return response # response is already the canned block reply return self.postprocess(response, message)