Add API usage tracking and dynamic task reloading

Features:
- Usage tracking system (usage_tracker.py)
  - Tracks input/output tokens per API call
  - Calculates costs with support for cache pricing
  - Stores data in usage_data.json (gitignored)
  - Integrated into llm_interface.py

- Dynamic task scheduler reloading
  - Auto-detects YAML changes every 60s
  - No restart needed for new tasks
  - reload_tasks() method for manual refresh

- Example cost tracking scheduled task
  - Daily API usage report
  - Budget tracking ($5/month target)
  - Disabled by default in scheduled_tasks.yaml

Improvements:
- Fixed tool_use/tool_result pair splitting bug (CRITICAL)
- Added thread safety to agent.chat()
- Fixed N+1 query problem in hybrid search
- Optimized database batch queries
- Added conversation history pruning (50 messages max)

Updated .gitignore:
- Exclude user profiles (memory_workspace/users/*.md)
- Exclude usage data (usage_data.json)
- Exclude vector index (vectors.usearch)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-13 23:38:44 -07:00
parent ab3a5afd59
commit 8afff96bb5
16 changed files with 1096 additions and 244 deletions

View File

@@ -11,6 +11,9 @@ from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
import numpy as np
from fastembed import TextEmbedding
from usearch.index import Index
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
@@ -84,6 +87,26 @@ class MemorySystem:
self._init_schema()
self._init_special_files()
# Initialize embedding model (384-dim, local, $0 cost)
print("Loading FastEmbed model...")
self.embedding_model = TextEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Initialize vector index
self.vector_index_path = self.workspace_dir / "vectors.usearch"
self.vector_index = Index(
ndim=384, # all-MiniLM-L6-v2 dimensionality
metric="cos", # cosine similarity
)
# Load existing index if present
if self.vector_index_path.exists():
self.vector_index.load(str(self.vector_index_path))
print(f"Loaded {len(self.vector_index)} vectors from index")
else:
print("Created new vector index")
self.observer: Optional[Observer] = None
self.dirty = False
@@ -112,7 +135,8 @@ class MemorySystem:
start_line INTEGER NOT NULL,
end_line INTEGER NOT NULL,
text TEXT NOT NULL,
updated_at INTEGER NOT NULL
updated_at INTEGER NOT NULL,
vector_id INTEGER
)
""")
@@ -141,6 +165,14 @@ class MemorySystem:
"CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status)"
)
# Migration: Add vector_id column if it doesn't exist
try:
self.db.execute("ALTER TABLE chunks ADD COLUMN vector_id INTEGER")
print("Added vector_id column to chunks table")
except sqlite3.OperationalError:
# Column already exists
pass
self.db.commit()
def _init_special_files(self) -> None:
@@ -217,7 +249,20 @@ class MemorySystem:
if existing and existing["hash"] == file_hash:
return # File unchanged
# Remove old chunks
# Remove old chunks and their vectors
old_chunks = self.db.execute(
"SELECT vector_id FROM chunks WHERE path = ?", (rel_path,)
).fetchall()
# Remove vectors from index
for row in old_chunks:
if row["vector_id"] is not None:
try:
self.vector_index.remove(row["vector_id"])
except (KeyError, IndexError):
pass # Vector might not exist in index, safe to ignore
# Remove from database
self.db.execute(
"DELETE FROM chunks WHERE path = ?", (rel_path,)
)
@@ -235,11 +280,17 @@ class MemorySystem:
f"{chunk['end_line']}:{chunk['text']}"
)
# Generate embedding and store in vector index
embedding = self._generate_embedding(chunk["text"])
# Use hash of chunk_id as unique integer key for usearch
vector_id = int(hashlib.sha256(chunk_id.encode()).hexdigest()[:15], 16)
self.vector_index.add(vector_id, embedding)
self.db.execute(
"""
INSERT OR REPLACE INTO chunks
(id, path, start_line, end_line, text, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
(id, path, start_line, end_line, text, updated_at, vector_id)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
chunk_id,
@@ -248,6 +299,7 @@ class MemorySystem:
chunk["end_line"],
chunk["text"],
now,
vector_id,
),
)
@@ -274,6 +326,10 @@ class MemorySystem:
)
self.db.commit()
# Save vector index to disk
self.vector_index.save(str(self.vector_index_path))
print(f"Indexed {rel_path} ({len(chunks)} chunks)")
def sync(self) -> None:
@@ -305,6 +361,12 @@ class MemorySystem:
sanitized = query.replace('"', '""') # Escape double quotes
return f'"{sanitized}"'
def _generate_embedding(self, text: str) -> np.ndarray:
"""Generate 384-dim embedding using FastEmbed (local, $0 cost)."""
# FastEmbed returns a generator, get first (and only) result
embeddings = list(self.embedding_model.embed([text]))
return embeddings[0]
def search(self, query: str, max_results: int = 5) -> List[Dict]:
"""Search memory using full-text search."""
# Sanitize query to prevent FTS5 injection
@@ -330,6 +392,154 @@ class MemorySystem:
return [dict(row) for row in results]
def search_hybrid(self, query: str, max_results: int = 5) -> List[Dict]:
"""
Hybrid search combining semantic (vector) and keyword (BM25) search.
Uses 0.7 vector similarity + 0.3 BM25 scoring for optimal retrieval.
"""
if len(self.vector_index) == 0:
# No vectors yet, fall back to keyword search
return self.search(query, max_results)
# 1. Generate query embedding for semantic search
query_embedding = self._generate_embedding(query)
# 2. Get top vector matches (retrieve more for re-ranking)
vector_matches = self.vector_index.search(
query_embedding, max_results * 3
)
# 3. Get BM25 keyword matches
safe_query = self._sanitize_fts5_query(query)
bm25_results = self.db.execute(
"""
SELECT
chunks.id,
chunks.path,
chunks.start_line,
chunks.end_line,
chunks.vector_id,
snippet(chunks_fts, 0, '**', '**', '...', 64) as snippet,
bm25(chunks_fts) as bm25_score
FROM chunks_fts
JOIN chunks ON chunks.path = chunks_fts.path
AND chunks.start_line = chunks_fts.start_line
WHERE chunks_fts MATCH ?
LIMIT ?
""",
(safe_query, max_results * 3),
).fetchall()
# 4. Normalize scores and combine
# Build maps for efficient lookup
vector_scores = {}
for match in vector_matches:
# usearch returns (key, distance) tuples
vector_id = int(match.key)
# Convert distance to similarity (cosine distance -> similarity)
similarity = 1 - match.distance
vector_scores[vector_id] = similarity
bm25_map = {}
for row in bm25_results:
bm25_map[row["id"]] = dict(row)
# Normalize BM25 scores (they're negative, lower is better)
if bm25_results:
bm25_values = [row["bm25_score"] for row in bm25_results]
min_bm25 = min(bm25_values)
max_bm25 = max(bm25_values)
bm25_range = max_bm25 - min_bm25 if max_bm25 != min_bm25 else 1
for chunk_id, chunk_data in bm25_map.items():
# Normalize to 0-1, then invert (lower BM25 is better)
normalized = (chunk_data["bm25_score"] - min_bm25) / bm25_range
bm25_map[chunk_id]["normalized_bm25"] = 1 - normalized
else:
# No BM25 results
pass
# 5. Combine scores: 0.7 vector + 0.3 BM25
combined_scores = {}
# Batch-fetch all chunks matching vector results in a single query
# instead of N separate queries (fixes N+1 query problem)
vector_id_list = [int(match.key) for match in vector_matches]
vector_chunk_map = {} # vector_id -> chunk data
if vector_id_list:
placeholders = ",".join("?" * len(vector_id_list))
vector_chunks = self.db.execute(
f"SELECT * FROM chunks WHERE vector_id IN ({placeholders})",
vector_id_list,
).fetchall()
for row in vector_chunks:
vector_chunk_map[row["vector_id"]] = dict(row)
# Collect all unique chunk IDs from both sources
all_chunk_ids = set()
for vid, chunk_data in vector_chunk_map.items():
all_chunk_ids.add(chunk_data["id"])
all_chunk_ids.update(bm25_map.keys())
# Batch-fetch any chunk data we don't already have
chunks_we_have = {cd["id"] for cd in vector_chunk_map.values()}
chunks_we_have.update(bm25_map.keys())
missing_ids = all_chunk_ids - chunks_we_have
all_chunk_data = {}
# Index data we already have from vector query
for chunk_data in vector_chunk_map.values():
all_chunk_data[chunk_data["id"]] = chunk_data
# Index data from BM25 results
for chunk_id, bm25_data in bm25_map.items():
if chunk_id not in all_chunk_data:
all_chunk_data[chunk_id] = bm25_data
# Fetch any remaining missing chunks in one query
if missing_ids:
placeholders = ",".join("?" * len(missing_ids))
missing_chunks = self.db.execute(
f"SELECT * FROM chunks WHERE id IN ({placeholders})",
list(missing_ids),
).fetchall()
for row in missing_chunks:
all_chunk_data[row["id"]] = dict(row)
# Calculate combined scores
for chunk_id in all_chunk_ids:
chunk_data = all_chunk_data.get(chunk_id)
if not chunk_data:
continue
vector_id = chunk_data.get("vector_id")
vector_score = vector_scores.get(vector_id, 0.0) if vector_id else 0.0
bm25_score = bm25_map.get(chunk_id, {}).get("normalized_bm25", 0.0)
# Weighted combination: 70% semantic, 30% keyword
combined = 0.7 * vector_score + 0.3 * bm25_score
snippet_text = chunk_data.get("text", "")
combined_scores[chunk_id] = {
"path": chunk_data["path"],
"start_line": chunk_data["start_line"],
"end_line": chunk_data["end_line"],
"snippet": bm25_map.get(chunk_id, {}).get(
"snippet",
snippet_text[:64] + "..." if len(snippet_text) > 64 else snippet_text
),
"score": combined,
}
# 6. Sort by combined score and return top results
sorted_results = sorted(
combined_scores.values(),
key=lambda x: x["score"],
reverse=True
)
return sorted_results[:max_results]
def write_memory(self, content: str, daily: bool = True) -> None:
"""Write to memory file."""
if daily:
@@ -595,6 +805,9 @@ class MemorySystem:
def close(self) -> None:
"""Close database and cleanup."""
self.stop_watching()
# Save vector index before closing
if len(self.vector_index) > 0:
self.vector_index.save(str(self.vector_index_path))
self.db.close()