Add API usage tracking and dynamic task reloading
Features: - Usage tracking system (usage_tracker.py) - Tracks input/output tokens per API call - Calculates costs with support for cache pricing - Stores data in usage_data.json (gitignored) - Integrated into llm_interface.py - Dynamic task scheduler reloading - Auto-detects YAML changes every 60s - No restart needed for new tasks - reload_tasks() method for manual refresh - Example cost tracking scheduled task - Daily API usage report - Budget tracking ($5/month target) - Disabled by default in scheduled_tasks.yaml Improvements: - Fixed tool_use/tool_result pair splitting bug (CRITICAL) - Added thread safety to agent.chat() - Fixed N+1 query problem in hybrid search - Optimized database batch queries - Added conversation history pruning (50 messages max) Updated .gitignore: - Exclude user profiles (memory_workspace/users/*.md) - Exclude usage data (usage_data.json) - Exclude vector index (vectors.usearch) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
221
memory_system.py
221
memory_system.py
@@ -11,6 +11,9 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
from fastembed import TextEmbedding
|
||||
from usearch.index import Index
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers import Observer
|
||||
|
||||
@@ -84,6 +87,26 @@ class MemorySystem:
|
||||
self._init_schema()
|
||||
self._init_special_files()
|
||||
|
||||
# Initialize embedding model (384-dim, local, $0 cost)
|
||||
print("Loading FastEmbed model...")
|
||||
self.embedding_model = TextEmbedding(
|
||||
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
||||
)
|
||||
|
||||
# Initialize vector index
|
||||
self.vector_index_path = self.workspace_dir / "vectors.usearch"
|
||||
self.vector_index = Index(
|
||||
ndim=384, # all-MiniLM-L6-v2 dimensionality
|
||||
metric="cos", # cosine similarity
|
||||
)
|
||||
|
||||
# Load existing index if present
|
||||
if self.vector_index_path.exists():
|
||||
self.vector_index.load(str(self.vector_index_path))
|
||||
print(f"Loaded {len(self.vector_index)} vectors from index")
|
||||
else:
|
||||
print("Created new vector index")
|
||||
|
||||
self.observer: Optional[Observer] = None
|
||||
self.dirty = False
|
||||
|
||||
@@ -112,7 +135,8 @@ class MemorySystem:
|
||||
start_line INTEGER NOT NULL,
|
||||
end_line INTEGER NOT NULL,
|
||||
text TEXT NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
updated_at INTEGER NOT NULL,
|
||||
vector_id INTEGER
|
||||
)
|
||||
""")
|
||||
|
||||
@@ -141,6 +165,14 @@ class MemorySystem:
|
||||
"CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status)"
|
||||
)
|
||||
|
||||
# Migration: Add vector_id column if it doesn't exist
|
||||
try:
|
||||
self.db.execute("ALTER TABLE chunks ADD COLUMN vector_id INTEGER")
|
||||
print("Added vector_id column to chunks table")
|
||||
except sqlite3.OperationalError:
|
||||
# Column already exists
|
||||
pass
|
||||
|
||||
self.db.commit()
|
||||
|
||||
def _init_special_files(self) -> None:
|
||||
@@ -217,7 +249,20 @@ class MemorySystem:
|
||||
if existing and existing["hash"] == file_hash:
|
||||
return # File unchanged
|
||||
|
||||
# Remove old chunks
|
||||
# Remove old chunks and their vectors
|
||||
old_chunks = self.db.execute(
|
||||
"SELECT vector_id FROM chunks WHERE path = ?", (rel_path,)
|
||||
).fetchall()
|
||||
|
||||
# Remove vectors from index
|
||||
for row in old_chunks:
|
||||
if row["vector_id"] is not None:
|
||||
try:
|
||||
self.vector_index.remove(row["vector_id"])
|
||||
except (KeyError, IndexError):
|
||||
pass # Vector might not exist in index, safe to ignore
|
||||
|
||||
# Remove from database
|
||||
self.db.execute(
|
||||
"DELETE FROM chunks WHERE path = ?", (rel_path,)
|
||||
)
|
||||
@@ -235,11 +280,17 @@ class MemorySystem:
|
||||
f"{chunk['end_line']}:{chunk['text']}"
|
||||
)
|
||||
|
||||
# Generate embedding and store in vector index
|
||||
embedding = self._generate_embedding(chunk["text"])
|
||||
# Use hash of chunk_id as unique integer key for usearch
|
||||
vector_id = int(hashlib.sha256(chunk_id.encode()).hexdigest()[:15], 16)
|
||||
self.vector_index.add(vector_id, embedding)
|
||||
|
||||
self.db.execute(
|
||||
"""
|
||||
INSERT OR REPLACE INTO chunks
|
||||
(id, path, start_line, end_line, text, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
(id, path, start_line, end_line, text, updated_at, vector_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
chunk_id,
|
||||
@@ -248,6 +299,7 @@ class MemorySystem:
|
||||
chunk["end_line"],
|
||||
chunk["text"],
|
||||
now,
|
||||
vector_id,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -274,6 +326,10 @@ class MemorySystem:
|
||||
)
|
||||
|
||||
self.db.commit()
|
||||
|
||||
# Save vector index to disk
|
||||
self.vector_index.save(str(self.vector_index_path))
|
||||
|
||||
print(f"Indexed {rel_path} ({len(chunks)} chunks)")
|
||||
|
||||
def sync(self) -> None:
|
||||
@@ -305,6 +361,12 @@ class MemorySystem:
|
||||
sanitized = query.replace('"', '""') # Escape double quotes
|
||||
return f'"{sanitized}"'
|
||||
|
||||
def _generate_embedding(self, text: str) -> np.ndarray:
|
||||
"""Generate 384-dim embedding using FastEmbed (local, $0 cost)."""
|
||||
# FastEmbed returns a generator, get first (and only) result
|
||||
embeddings = list(self.embedding_model.embed([text]))
|
||||
return embeddings[0]
|
||||
|
||||
def search(self, query: str, max_results: int = 5) -> List[Dict]:
|
||||
"""Search memory using full-text search."""
|
||||
# Sanitize query to prevent FTS5 injection
|
||||
@@ -330,6 +392,154 @@ class MemorySystem:
|
||||
|
||||
return [dict(row) for row in results]
|
||||
|
||||
def search_hybrid(self, query: str, max_results: int = 5) -> List[Dict]:
|
||||
"""
|
||||
Hybrid search combining semantic (vector) and keyword (BM25) search.
|
||||
|
||||
Uses 0.7 vector similarity + 0.3 BM25 scoring for optimal retrieval.
|
||||
"""
|
||||
if len(self.vector_index) == 0:
|
||||
# No vectors yet, fall back to keyword search
|
||||
return self.search(query, max_results)
|
||||
|
||||
# 1. Generate query embedding for semantic search
|
||||
query_embedding = self._generate_embedding(query)
|
||||
|
||||
# 2. Get top vector matches (retrieve more for re-ranking)
|
||||
vector_matches = self.vector_index.search(
|
||||
query_embedding, max_results * 3
|
||||
)
|
||||
|
||||
# 3. Get BM25 keyword matches
|
||||
safe_query = self._sanitize_fts5_query(query)
|
||||
bm25_results = self.db.execute(
|
||||
"""
|
||||
SELECT
|
||||
chunks.id,
|
||||
chunks.path,
|
||||
chunks.start_line,
|
||||
chunks.end_line,
|
||||
chunks.vector_id,
|
||||
snippet(chunks_fts, 0, '**', '**', '...', 64) as snippet,
|
||||
bm25(chunks_fts) as bm25_score
|
||||
FROM chunks_fts
|
||||
JOIN chunks ON chunks.path = chunks_fts.path
|
||||
AND chunks.start_line = chunks_fts.start_line
|
||||
WHERE chunks_fts MATCH ?
|
||||
LIMIT ?
|
||||
""",
|
||||
(safe_query, max_results * 3),
|
||||
).fetchall()
|
||||
|
||||
# 4. Normalize scores and combine
|
||||
# Build maps for efficient lookup
|
||||
vector_scores = {}
|
||||
for match in vector_matches:
|
||||
# usearch returns (key, distance) tuples
|
||||
vector_id = int(match.key)
|
||||
# Convert distance to similarity (cosine distance -> similarity)
|
||||
similarity = 1 - match.distance
|
||||
vector_scores[vector_id] = similarity
|
||||
|
||||
bm25_map = {}
|
||||
for row in bm25_results:
|
||||
bm25_map[row["id"]] = dict(row)
|
||||
|
||||
# Normalize BM25 scores (they're negative, lower is better)
|
||||
if bm25_results:
|
||||
bm25_values = [row["bm25_score"] for row in bm25_results]
|
||||
min_bm25 = min(bm25_values)
|
||||
max_bm25 = max(bm25_values)
|
||||
bm25_range = max_bm25 - min_bm25 if max_bm25 != min_bm25 else 1
|
||||
|
||||
for chunk_id, chunk_data in bm25_map.items():
|
||||
# Normalize to 0-1, then invert (lower BM25 is better)
|
||||
normalized = (chunk_data["bm25_score"] - min_bm25) / bm25_range
|
||||
bm25_map[chunk_id]["normalized_bm25"] = 1 - normalized
|
||||
else:
|
||||
# No BM25 results
|
||||
pass
|
||||
|
||||
# 5. Combine scores: 0.7 vector + 0.3 BM25
|
||||
combined_scores = {}
|
||||
|
||||
# Batch-fetch all chunks matching vector results in a single query
|
||||
# instead of N separate queries (fixes N+1 query problem)
|
||||
vector_id_list = [int(match.key) for match in vector_matches]
|
||||
vector_chunk_map = {} # vector_id -> chunk data
|
||||
if vector_id_list:
|
||||
placeholders = ",".join("?" * len(vector_id_list))
|
||||
vector_chunks = self.db.execute(
|
||||
f"SELECT * FROM chunks WHERE vector_id IN ({placeholders})",
|
||||
vector_id_list,
|
||||
).fetchall()
|
||||
for row in vector_chunks:
|
||||
vector_chunk_map[row["vector_id"]] = dict(row)
|
||||
|
||||
# Collect all unique chunk IDs from both sources
|
||||
all_chunk_ids = set()
|
||||
for vid, chunk_data in vector_chunk_map.items():
|
||||
all_chunk_ids.add(chunk_data["id"])
|
||||
all_chunk_ids.update(bm25_map.keys())
|
||||
|
||||
# Batch-fetch any chunk data we don't already have
|
||||
chunks_we_have = {cd["id"] for cd in vector_chunk_map.values()}
|
||||
chunks_we_have.update(bm25_map.keys())
|
||||
missing_ids = all_chunk_ids - chunks_we_have
|
||||
|
||||
all_chunk_data = {}
|
||||
# Index data we already have from vector query
|
||||
for chunk_data in vector_chunk_map.values():
|
||||
all_chunk_data[chunk_data["id"]] = chunk_data
|
||||
# Index data from BM25 results
|
||||
for chunk_id, bm25_data in bm25_map.items():
|
||||
if chunk_id not in all_chunk_data:
|
||||
all_chunk_data[chunk_id] = bm25_data
|
||||
|
||||
# Fetch any remaining missing chunks in one query
|
||||
if missing_ids:
|
||||
placeholders = ",".join("?" * len(missing_ids))
|
||||
missing_chunks = self.db.execute(
|
||||
f"SELECT * FROM chunks WHERE id IN ({placeholders})",
|
||||
list(missing_ids),
|
||||
).fetchall()
|
||||
for row in missing_chunks:
|
||||
all_chunk_data[row["id"]] = dict(row)
|
||||
|
||||
# Calculate combined scores
|
||||
for chunk_id in all_chunk_ids:
|
||||
chunk_data = all_chunk_data.get(chunk_id)
|
||||
if not chunk_data:
|
||||
continue
|
||||
|
||||
vector_id = chunk_data.get("vector_id")
|
||||
vector_score = vector_scores.get(vector_id, 0.0) if vector_id else 0.0
|
||||
bm25_score = bm25_map.get(chunk_id, {}).get("normalized_bm25", 0.0)
|
||||
|
||||
# Weighted combination: 70% semantic, 30% keyword
|
||||
combined = 0.7 * vector_score + 0.3 * bm25_score
|
||||
|
||||
snippet_text = chunk_data.get("text", "")
|
||||
combined_scores[chunk_id] = {
|
||||
"path": chunk_data["path"],
|
||||
"start_line": chunk_data["start_line"],
|
||||
"end_line": chunk_data["end_line"],
|
||||
"snippet": bm25_map.get(chunk_id, {}).get(
|
||||
"snippet",
|
||||
snippet_text[:64] + "..." if len(snippet_text) > 64 else snippet_text
|
||||
),
|
||||
"score": combined,
|
||||
}
|
||||
|
||||
# 6. Sort by combined score and return top results
|
||||
sorted_results = sorted(
|
||||
combined_scores.values(),
|
||||
key=lambda x: x["score"],
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return sorted_results[:max_results]
|
||||
|
||||
def write_memory(self, content: str, daily: bool = True) -> None:
|
||||
"""Write to memory file."""
|
||||
if daily:
|
||||
@@ -595,6 +805,9 @@ class MemorySystem:
|
||||
def close(self) -> None:
|
||||
"""Close database and cleanup."""
|
||||
self.stop_watching()
|
||||
# Save vector index before closing
|
||||
if len(self.vector_index) > 0:
|
||||
self.vector_index.save(str(self.vector_index_path))
|
||||
self.db.close()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user