feat: add backfill script for return classification on existing anomaly log rows

This commit is contained in:
root
2026-05-01 17:27:49 +00:00
parent 15abf8bae4
commit 3202b0707c

View File

@@ -0,0 +1,156 @@
#!/usr/bin/env node
// backfill_return_classification.js
//
// Retroactively populates return_classification_json for existing anomaly log
// rows that have returned_count > 0 but an empty return classification.
//
// For each such row, looks at archive transitions that went ARCHIVED → RETURNED
// on that date, then finds the *prior* archive reason (the most recent
// transition to ARCHIVED for that same archive record) to determine why the
// finding originally left — which tells us why it came back.
//
// Safe to run multiple times — only updates rows with empty classification.
//
// Usage: node backend/migrations/backfill_return_classification.js
const path = require('path');
const sqlite3 = require('sqlite3').verbose();
const DB_PATH = path.join(__dirname, '..', 'cve_database.db');
function dbAll(db, sql, params = []) {
return new Promise((resolve, reject) => {
db.all(sql, params, (err, rows) => {
if (err) reject(err);
else resolve(rows || []);
});
});
}
function dbGet(db, sql, params = []) {
return new Promise((resolve, reject) => {
db.get(sql, params, (err, row) => {
if (err) reject(err);
else resolve(row);
});
});
}
function dbRun(db, sql, params = []) {
return new Promise((resolve, reject) => {
db.run(sql, params, function (err) {
if (err) reject(err);
else resolve(this);
});
});
}
async function main() {
const db = new sqlite3.Database(DB_PATH);
// Find anomaly log rows that have returned findings but no return classification
const rows = await dbAll(db,
`SELECT id, sync_timestamp, returned_count, return_classification_json
FROM ivanti_sync_anomaly_log
WHERE returned_count > 0
ORDER BY sync_timestamp ASC`
);
if (rows.length === 0) {
console.log('No anomaly log rows with returned findings found — nothing to backfill.');
db.close();
return;
}
let updated = 0;
let skipped = 0;
for (const row of rows) {
// Skip if already has a non-empty classification
let existing = {};
try { existing = JSON.parse(row.return_classification_json || '{}'); } catch (_) {}
const hasData = Object.values(existing).some(v => v > 0);
if (hasData) {
skipped++;
continue;
}
// Find the date of this anomaly row
const date = row.sync_timestamp.split('T')[0].split(' ')[0];
// Find all ARCHIVED → RETURNED transitions on this date
const returnTransitions = await dbAll(db,
`SELECT archive_id
FROM ivanti_archive_transitions
WHERE to_state = 'RETURNED'
AND DATE(transitioned_at) = ?`,
[date]
);
if (returnTransitions.length === 0) {
// No transitions found for this date — try a wider window (±1 day)
// since sync_timestamp and transitioned_at might not align exactly
const wider = await dbAll(db,
`SELECT archive_id
FROM ivanti_archive_transitions
WHERE to_state = 'RETURNED'
AND DATE(transitioned_at) BETWEEN DATE(?, '-1 day') AND DATE(?, '+1 day')`,
[date, date]
);
if (wider.length === 0) {
console.log(` ${date}: ${row.returned_count} returned but no matching transitions found — skipping`);
continue;
}
returnTransitions.push(...wider);
}
// For each returned finding, look up the prior archive reason
const classification = { bu_reassignment: 0, severity_drift: 0, closed_on_platform: 0, decommissioned: 0 };
const seen = new Set();
for (const rt of returnTransitions) {
if (seen.has(rt.archive_id)) continue;
seen.add(rt.archive_id);
// Find the most recent ARCHIVED transition for this archive record
// (the reason it was archived before it returned)
const archiveTransition = await dbGet(db,
`SELECT reason FROM ivanti_archive_transitions
WHERE archive_id = ? AND to_state = 'ARCHIVED'
ORDER BY transitioned_at DESC LIMIT 1`,
[rt.archive_id]
);
if (archiveTransition && archiveTransition.reason) {
const reasonKey = archiveTransition.reason.split(':')[0];
if (reasonKey in classification) {
classification[reasonKey]++;
}
}
}
const classificationJson = JSON.stringify(classification);
await dbRun(db,
`UPDATE ivanti_sync_anomaly_log
SET return_classification_json = ?
WHERE id = ?`,
[classificationJson, row.id]
);
const parts = Object.entries(classification)
.filter(([, v]) => v > 0)
.map(([k, v]) => `${v} ${k}`);
const breakdown = parts.length > 0 ? parts.join(', ') : 'unclassified';
console.log(` ${date}: ${row.returned_count} returned — ${breakdown}`);
updated++;
}
console.log(`\nBackfill complete: ${updated} rows updated, ${skipped} already had data.`);
db.close();
}
main().catch(err => {
console.error('Fatal error:', err);
process.exit(1);
});