Add API usage tracking and dynamic task reloading

Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-13 23:38:44 -07:00
parent ab3a5afd59
commit 8afff96bb5
16 changed files with 1096 additions and 244 deletions
--- a/memory_system.py
+++ b/memory_system.py
@@ -11,6 +11,9 @@ from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Optional

+import numpy as np
+from fastembed import TextEmbedding
+from usearch.index import Index
 from watchdog.events import FileSystemEventHandler
 from watchdog.observers import Observer

@@ -84,6 +87,26 @@ class MemorySystem:
        self._init_schema()
        self._init_special_files()

+        # Initialize embedding model (384-dim, local, $0 cost)
+        print("Loading FastEmbed model...")
+        self.embedding_model = TextEmbedding(
+            model_name="sentence-transformers/all-MiniLM-L6-v2"
+        )
+
+        # Initialize vector index
+        self.vector_index_path = self.workspace_dir / "vectors.usearch"
+        self.vector_index = Index(
+            ndim=384,  # all-MiniLM-L6-v2 dimensionality
+            metric="cos",  # cosine similarity
+        )
+
+        # Load existing index if present
+        if self.vector_index_path.exists():
+            self.vector_index.load(str(self.vector_index_path))
+            print(f"Loaded {len(self.vector_index)} vectors from index")
+        else:
+            print("Created new vector index")
+
        self.observer: Optional[Observer] = None
        self.dirty = False

@@ -112,7 +135,8 @@ class MemorySystem:
                start_line INTEGER NOT NULL,
                end_line INTEGER NOT NULL,
                text TEXT NOT NULL,
-                updated_at INTEGER NOT NULL
+                updated_at INTEGER NOT NULL,
+                vector_id INTEGER
            )
        """)

@@ -141,6 +165,14 @@ class MemorySystem:
            "CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status)"
        )

+        # Migration: Add vector_id column if it doesn't exist
+        try:
+            self.db.execute("ALTER TABLE chunks ADD COLUMN vector_id INTEGER")
+            print("Added vector_id column to chunks table")
+        except sqlite3.OperationalError:
+            # Column already exists
+            pass
+
        self.db.commit()

    def _init_special_files(self) -> None:
@@ -217,7 +249,20 @@ class MemorySystem:
        if existing and existing["hash"] == file_hash:
            return  # File unchanged

-        # Remove old chunks
+        # Remove old chunks and their vectors
+        old_chunks = self.db.execute(
+            "SELECT vector_id FROM chunks WHERE path = ?", (rel_path,)
+        ).fetchall()
+
+        # Remove vectors from index
+        for row in old_chunks:
+            if row["vector_id"] is not None:
+                try:
+                    self.vector_index.remove(row["vector_id"])
+                except (KeyError, IndexError):
+                    pass  # Vector might not exist in index, safe to ignore
+
+        # Remove from database
        self.db.execute(
            "DELETE FROM chunks WHERE path = ?", (rel_path,)
        )
@@ -235,11 +280,17 @@ class MemorySystem:
                f"{chunk['end_line']}:{chunk['text']}"
            )

+            # Generate embedding and store in vector index
+            embedding = self._generate_embedding(chunk["text"])
+            # Use hash of chunk_id as unique integer key for usearch
+            vector_id = int(hashlib.sha256(chunk_id.encode()).hexdigest()[:15], 16)
+            self.vector_index.add(vector_id, embedding)
+
            self.db.execute(
                """
                INSERT OR REPLACE INTO chunks
-                (id, path, start_line, end_line, text, updated_at)
-                VALUES (?, ?, ?, ?, ?, ?)
+                (id, path, start_line, end_line, text, updated_at, vector_id)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
                """,
                (
                    chunk_id,
@@ -248,6 +299,7 @@ class MemorySystem:
                    chunk["end_line"],
                    chunk["text"],
                    now,
+                    vector_id,
                ),
            )

@@ -274,6 +326,10 @@ class MemorySystem:
        )

        self.db.commit()
+
+        # Save vector index to disk
+        self.vector_index.save(str(self.vector_index_path))
+
        print(f"Indexed {rel_path} ({len(chunks)} chunks)")

    def sync(self) -> None:
@@ -305,6 +361,12 @@ class MemorySystem:
        sanitized = query.replace('"', '""')  # Escape double quotes
        return f'"{sanitized}"'

+    def _generate_embedding(self, text: str) -> np.ndarray:
+        """Generate 384-dim embedding using FastEmbed (local, $0 cost)."""
+        # FastEmbed returns a generator, get first (and only) result
+        embeddings = list(self.embedding_model.embed([text]))
+        return embeddings[0]
+
    def search(self, query: str, max_results: int = 5) -> List[Dict]:
        """Search memory using full-text search."""
        # Sanitize query to prevent FTS5 injection
@@ -330,6 +392,154 @@ class MemorySystem:

        return [dict(row) for row in results]

+    def search_hybrid(self, query: str, max_results: int = 5) -> List[Dict]:
+        """
+        Hybrid search combining semantic (vector) and keyword (BM25) search.
+
+        Uses 0.7 vector similarity + 0.3 BM25 scoring for optimal retrieval.
+        """
+        if len(self.vector_index) == 0:
+            # No vectors yet, fall back to keyword search
+            return self.search(query, max_results)
+
+        # 1. Generate query embedding for semantic search
+        query_embedding = self._generate_embedding(query)
+
+        # 2. Get top vector matches (retrieve more for re-ranking)
+        vector_matches = self.vector_index.search(
+            query_embedding, max_results * 3
+        )
+
+        # 3. Get BM25 keyword matches
+        safe_query = self._sanitize_fts5_query(query)
+        bm25_results = self.db.execute(
+            """
+            SELECT
+                chunks.id,
+                chunks.path,
+                chunks.start_line,
+                chunks.end_line,
+                chunks.vector_id,
+                snippet(chunks_fts, 0, '**', '**', '...', 64) as snippet,
+                bm25(chunks_fts) as bm25_score
+            FROM chunks_fts
+            JOIN chunks ON chunks.path = chunks_fts.path
+                AND chunks.start_line = chunks_fts.start_line
+            WHERE chunks_fts MATCH ?
+            LIMIT ?
+            """,
+            (safe_query, max_results * 3),
+        ).fetchall()
+
+        # 4. Normalize scores and combine
+        # Build maps for efficient lookup
+        vector_scores = {}
+        for match in vector_matches:
+            # usearch returns (key, distance) tuples
+            vector_id = int(match.key)
+            # Convert distance to similarity (cosine distance -> similarity)
+            similarity = 1 - match.distance
+            vector_scores[vector_id] = similarity
+
+        bm25_map = {}
+        for row in bm25_results:
+            bm25_map[row["id"]] = dict(row)
+
+        # Normalize BM25 scores (they're negative, lower is better)
+        if bm25_results:
+            bm25_values = [row["bm25_score"] for row in bm25_results]
+            min_bm25 = min(bm25_values)
+            max_bm25 = max(bm25_values)
+            bm25_range = max_bm25 - min_bm25 if max_bm25 != min_bm25 else 1
+
+            for chunk_id, chunk_data in bm25_map.items():
+                # Normalize to 0-1, then invert (lower BM25 is better)
+                normalized = (chunk_data["bm25_score"] - min_bm25) / bm25_range
+                bm25_map[chunk_id]["normalized_bm25"] = 1 - normalized
+        else:
+            # No BM25 results
+            pass
+
+        # 5. Combine scores: 0.7 vector + 0.3 BM25
+        combined_scores = {}
+
+        # Batch-fetch all chunks matching vector results in a single query
+        # instead of N separate queries (fixes N+1 query problem)
+        vector_id_list = [int(match.key) for match in vector_matches]
+        vector_chunk_map = {}  # vector_id -> chunk data
+        if vector_id_list:
+            placeholders = ",".join("?" * len(vector_id_list))
+            vector_chunks = self.db.execute(
+                f"SELECT * FROM chunks WHERE vector_id IN ({placeholders})",
+                vector_id_list,
+            ).fetchall()
+            for row in vector_chunks:
+                vector_chunk_map[row["vector_id"]] = dict(row)
+
+        # Collect all unique chunk IDs from both sources
+        all_chunk_ids = set()
+        for vid, chunk_data in vector_chunk_map.items():
+            all_chunk_ids.add(chunk_data["id"])
+        all_chunk_ids.update(bm25_map.keys())
+
+        # Batch-fetch any chunk data we don't already have
+        chunks_we_have = {cd["id"] for cd in vector_chunk_map.values()}
+        chunks_we_have.update(bm25_map.keys())
+        missing_ids = all_chunk_ids - chunks_we_have
+
+        all_chunk_data = {}
+        # Index data we already have from vector query
+        for chunk_data in vector_chunk_map.values():
+            all_chunk_data[chunk_data["id"]] = chunk_data
+        # Index data from BM25 results
+        for chunk_id, bm25_data in bm25_map.items():
+            if chunk_id not in all_chunk_data:
+                all_chunk_data[chunk_id] = bm25_data
+
+        # Fetch any remaining missing chunks in one query
+        if missing_ids:
+            placeholders = ",".join("?" * len(missing_ids))
+            missing_chunks = self.db.execute(
+                f"SELECT * FROM chunks WHERE id IN ({placeholders})",
+                list(missing_ids),
+            ).fetchall()
+            for row in missing_chunks:
+                all_chunk_data[row["id"]] = dict(row)
+
+        # Calculate combined scores
+        for chunk_id in all_chunk_ids:
+            chunk_data = all_chunk_data.get(chunk_id)
+            if not chunk_data:
+                continue
+
+            vector_id = chunk_data.get("vector_id")
+            vector_score = vector_scores.get(vector_id, 0.0) if vector_id else 0.0
+            bm25_score = bm25_map.get(chunk_id, {}).get("normalized_bm25", 0.0)
+
+            # Weighted combination: 70% semantic, 30% keyword
+            combined = 0.7 * vector_score + 0.3 * bm25_score
+
+            snippet_text = chunk_data.get("text", "")
+            combined_scores[chunk_id] = {
+                "path": chunk_data["path"],
+                "start_line": chunk_data["start_line"],
+                "end_line": chunk_data["end_line"],
+                "snippet": bm25_map.get(chunk_id, {}).get(
+                    "snippet",
+                    snippet_text[:64] + "..." if len(snippet_text) > 64 else snippet_text
+                ),
+                "score": combined,
+            }
+
+        # 6. Sort by combined score and return top results
+        sorted_results = sorted(
+            combined_scores.values(),
+            key=lambda x: x["score"],
+            reverse=True
+        )
+
+        return sorted_results[:max_results]
+
    def write_memory(self, content: str, daily: bool = True) -> None:
        """Write to memory file."""
        if daily:
@@ -595,6 +805,9 @@ class MemorySystem:
    def close(self) -> None:
        """Close database and cleanup."""
        self.stop_watching()
+        # Save vector index before closing
+        if len(self.vector_index) > 0:
+            self.vector_index.save(str(self.vector_index_path))
        self.db.close()