feat: add backfill script for return classification on existing anomaly log rows
This commit is contained in:
156
backend/migrations/backfill_return_classification.js
Normal file
156
backend/migrations/backfill_return_classification.js
Normal file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env node
|
||||
// backfill_return_classification.js
|
||||
//
|
||||
// Retroactively populates return_classification_json for existing anomaly log
|
||||
// rows that have returned_count > 0 but an empty return classification.
|
||||
//
|
||||
// For each such row, looks at archive transitions that went ARCHIVED → RETURNED
|
||||
// on that date, then finds the *prior* archive reason (the most recent
|
||||
// transition to ARCHIVED for that same archive record) to determine why the
|
||||
// finding originally left — which tells us why it came back.
|
||||
//
|
||||
// Safe to run multiple times — only updates rows with empty classification.
|
||||
//
|
||||
// Usage: node backend/migrations/backfill_return_classification.js
|
||||
|
||||
const path = require('path');
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
|
||||
const DB_PATH = path.join(__dirname, '..', 'cve_database.db');
|
||||
|
||||
function dbAll(db, sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.all(sql, params, (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows || []);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function dbGet(db, sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.get(sql, params, (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function dbRun(db, sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.run(sql, params, function (err) {
|
||||
if (err) reject(err);
|
||||
else resolve(this);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const db = new sqlite3.Database(DB_PATH);
|
||||
|
||||
// Find anomaly log rows that have returned findings but no return classification
|
||||
const rows = await dbAll(db,
|
||||
`SELECT id, sync_timestamp, returned_count, return_classification_json
|
||||
FROM ivanti_sync_anomaly_log
|
||||
WHERE returned_count > 0
|
||||
ORDER BY sync_timestamp ASC`
|
||||
);
|
||||
|
||||
if (rows.length === 0) {
|
||||
console.log('No anomaly log rows with returned findings found — nothing to backfill.');
|
||||
db.close();
|
||||
return;
|
||||
}
|
||||
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
|
||||
for (const row of rows) {
|
||||
// Skip if already has a non-empty classification
|
||||
let existing = {};
|
||||
try { existing = JSON.parse(row.return_classification_json || '{}'); } catch (_) {}
|
||||
const hasData = Object.values(existing).some(v => v > 0);
|
||||
if (hasData) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find the date of this anomaly row
|
||||
const date = row.sync_timestamp.split('T')[0].split(' ')[0];
|
||||
|
||||
// Find all ARCHIVED → RETURNED transitions on this date
|
||||
const returnTransitions = await dbAll(db,
|
||||
`SELECT archive_id
|
||||
FROM ivanti_archive_transitions
|
||||
WHERE to_state = 'RETURNED'
|
||||
AND DATE(transitioned_at) = ?`,
|
||||
[date]
|
||||
);
|
||||
|
||||
if (returnTransitions.length === 0) {
|
||||
// No transitions found for this date — try a wider window (±1 day)
|
||||
// since sync_timestamp and transitioned_at might not align exactly
|
||||
const wider = await dbAll(db,
|
||||
`SELECT archive_id
|
||||
FROM ivanti_archive_transitions
|
||||
WHERE to_state = 'RETURNED'
|
||||
AND DATE(transitioned_at) BETWEEN DATE(?, '-1 day') AND DATE(?, '+1 day')`,
|
||||
[date, date]
|
||||
);
|
||||
if (wider.length === 0) {
|
||||
console.log(` ${date}: ${row.returned_count} returned but no matching transitions found — skipping`);
|
||||
continue;
|
||||
}
|
||||
returnTransitions.push(...wider);
|
||||
}
|
||||
|
||||
// For each returned finding, look up the prior archive reason
|
||||
const classification = { bu_reassignment: 0, severity_drift: 0, closed_on_platform: 0, decommissioned: 0 };
|
||||
const seen = new Set();
|
||||
|
||||
for (const rt of returnTransitions) {
|
||||
if (seen.has(rt.archive_id)) continue;
|
||||
seen.add(rt.archive_id);
|
||||
|
||||
// Find the most recent ARCHIVED transition for this archive record
|
||||
// (the reason it was archived before it returned)
|
||||
const archiveTransition = await dbGet(db,
|
||||
`SELECT reason FROM ivanti_archive_transitions
|
||||
WHERE archive_id = ? AND to_state = 'ARCHIVED'
|
||||
ORDER BY transitioned_at DESC LIMIT 1`,
|
||||
[rt.archive_id]
|
||||
);
|
||||
|
||||
if (archiveTransition && archiveTransition.reason) {
|
||||
const reasonKey = archiveTransition.reason.split(':')[0];
|
||||
if (reasonKey in classification) {
|
||||
classification[reasonKey]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const classificationJson = JSON.stringify(classification);
|
||||
await dbRun(db,
|
||||
`UPDATE ivanti_sync_anomaly_log
|
||||
SET return_classification_json = ?
|
||||
WHERE id = ?`,
|
||||
[classificationJson, row.id]
|
||||
);
|
||||
|
||||
const parts = Object.entries(classification)
|
||||
.filter(([, v]) => v > 0)
|
||||
.map(([k, v]) => `${v} ${k}`);
|
||||
const breakdown = parts.length > 0 ? parts.join(', ') : 'unclassified';
|
||||
|
||||
console.log(` ${date}: ${row.returned_count} returned — ${breakdown}`);
|
||||
updated++;
|
||||
}
|
||||
|
||||
console.log(`\nBackfill complete: ${updated} rows updated, ${skipped} already had data.`);
|
||||
db.close();
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Fatal error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user