diff --git a/backend/migrations/backfill_return_classification.js b/backend/migrations/backfill_return_classification.js new file mode 100644 index 0000000..f361829 --- /dev/null +++ b/backend/migrations/backfill_return_classification.js @@ -0,0 +1,156 @@ +#!/usr/bin/env node +// backfill_return_classification.js +// +// Retroactively populates return_classification_json for existing anomaly log +// rows that have returned_count > 0 but an empty return classification. +// +// For each such row, looks at archive transitions that went ARCHIVED → RETURNED +// on that date, then finds the *prior* archive reason (the most recent +// transition to ARCHIVED for that same archive record) to determine why the +// finding originally left — which tells us why it came back. +// +// Safe to run multiple times — only updates rows with empty classification. +// +// Usage: node backend/migrations/backfill_return_classification.js + +const path = require('path'); +const sqlite3 = require('sqlite3').verbose(); + +const DB_PATH = path.join(__dirname, '..', 'cve_database.db'); + +function dbAll(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.all(sql, params, (err, rows) => { + if (err) reject(err); + else resolve(rows || []); + }); + }); +} + +function dbGet(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.get(sql, params, (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); +} + +function dbRun(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.run(sql, params, function (err) { + if (err) reject(err); + else resolve(this); + }); + }); +} + +async function main() { + const db = new sqlite3.Database(DB_PATH); + + // Find anomaly log rows that have returned findings but no return classification + const rows = await dbAll(db, + `SELECT id, sync_timestamp, returned_count, return_classification_json + FROM ivanti_sync_anomaly_log + WHERE returned_count > 0 + ORDER BY sync_timestamp ASC` + ); + + if (rows.length === 0) { + console.log('No anomaly log rows with returned findings found — nothing to backfill.'); + db.close(); + return; + } + + let updated = 0; + let skipped = 0; + + for (const row of rows) { + // Skip if already has a non-empty classification + let existing = {}; + try { existing = JSON.parse(row.return_classification_json || '{}'); } catch (_) {} + const hasData = Object.values(existing).some(v => v > 0); + if (hasData) { + skipped++; + continue; + } + + // Find the date of this anomaly row + const date = row.sync_timestamp.split('T')[0].split(' ')[0]; + + // Find all ARCHIVED → RETURNED transitions on this date + const returnTransitions = await dbAll(db, + `SELECT archive_id + FROM ivanti_archive_transitions + WHERE to_state = 'RETURNED' + AND DATE(transitioned_at) = ?`, + [date] + ); + + if (returnTransitions.length === 0) { + // No transitions found for this date — try a wider window (±1 day) + // since sync_timestamp and transitioned_at might not align exactly + const wider = await dbAll(db, + `SELECT archive_id + FROM ivanti_archive_transitions + WHERE to_state = 'RETURNED' + AND DATE(transitioned_at) BETWEEN DATE(?, '-1 day') AND DATE(?, '+1 day')`, + [date, date] + ); + if (wider.length === 0) { + console.log(` ${date}: ${row.returned_count} returned but no matching transitions found — skipping`); + continue; + } + returnTransitions.push(...wider); + } + + // For each returned finding, look up the prior archive reason + const classification = { bu_reassignment: 0, severity_drift: 0, closed_on_platform: 0, decommissioned: 0 }; + const seen = new Set(); + + for (const rt of returnTransitions) { + if (seen.has(rt.archive_id)) continue; + seen.add(rt.archive_id); + + // Find the most recent ARCHIVED transition for this archive record + // (the reason it was archived before it returned) + const archiveTransition = await dbGet(db, + `SELECT reason FROM ivanti_archive_transitions + WHERE archive_id = ? AND to_state = 'ARCHIVED' + ORDER BY transitioned_at DESC LIMIT 1`, + [rt.archive_id] + ); + + if (archiveTransition && archiveTransition.reason) { + const reasonKey = archiveTransition.reason.split(':')[0]; + if (reasonKey in classification) { + classification[reasonKey]++; + } + } + } + + const classificationJson = JSON.stringify(classification); + await dbRun(db, + `UPDATE ivanti_sync_anomaly_log + SET return_classification_json = ? + WHERE id = ?`, + [classificationJson, row.id] + ); + + const parts = Object.entries(classification) + .filter(([, v]) => v > 0) + .map(([k, v]) => `${v} ${k}`); + const breakdown = parts.length > 0 ? parts.join(', ') : 'unclassified'; + + console.log(` ${date}: ${row.returned_count} returned — ${breakdown}`); + updated++; + } + + console.log(`\nBackfill complete: ${updated} rows updated, ${skipped} already had data.`); + db.close(); +} + +main().catch(err => { + console.error('Fatal error:', err); + process.exit(1); +});