Add backfill script for anomaly log historical data
This commit is contained in:
160
backend/migrations/backfill_anomaly_log.js
Normal file
160
backend/migrations/backfill_anomaly_log.js
Normal file
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env node
|
||||
// backfill_anomaly_log.js — One-time backfill of ivanti_sync_anomaly_log
|
||||
//
|
||||
// Synthesizes anomaly log entries from existing ivanti_archive_transitions
|
||||
// and ivanti_counts_history data so the archive activity sparkline on the
|
||||
// Findings Trend chart has historical data to display.
|
||||
//
|
||||
// Safe to run multiple times — checks for existing rows before inserting.
|
||||
//
|
||||
// Usage: node backend/migrations/backfill_anomaly_log.js
|
||||
|
||||
const path = require('path');
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
|
||||
const DB_PATH = path.join(__dirname, '..', 'cve_database.db');
|
||||
|
||||
function dbAll(db, sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.all(sql, params, (err, rows) => {
|
||||
if (err) reject(err);
|
||||
else resolve(rows || []);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function dbGet(db, sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.get(sql, params, (err, row) => {
|
||||
if (err) reject(err);
|
||||
else resolve(row);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function dbRun(db, sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.run(sql, params, function (err) {
|
||||
if (err) reject(err);
|
||||
else resolve(this);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const db = new sqlite3.Database(DB_PATH);
|
||||
|
||||
// Check if anomaly log already has data
|
||||
const existing = await dbGet(db, 'SELECT COUNT(*) as cnt FROM ivanti_sync_anomaly_log');
|
||||
if (existing.cnt > 0) {
|
||||
console.log(`ivanti_sync_anomaly_log already has ${existing.cnt} rows — skipping backfill.`);
|
||||
console.log('To force re-run, delete existing rows first:');
|
||||
console.log(' sqlite3 backend/cve_database.db "DELETE FROM ivanti_sync_anomaly_log;"');
|
||||
db.close();
|
||||
return;
|
||||
}
|
||||
|
||||
// Get archive transitions grouped by date
|
||||
const transitions = await dbAll(db,
|
||||
`SELECT DATE(transitioned_at) as date,
|
||||
to_state,
|
||||
reason,
|
||||
COUNT(*) as cnt
|
||||
FROM ivanti_archive_transitions
|
||||
GROUP BY date, to_state, reason
|
||||
ORDER BY date`
|
||||
);
|
||||
|
||||
// Get counts history (last snapshot per day) for delta computation
|
||||
const countsRows = await dbAll(db,
|
||||
`SELECT date, open_count, closed_count FROM (
|
||||
SELECT DATE(recorded_at) AS date,
|
||||
open_count, closed_count,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY DATE(recorded_at)
|
||||
ORDER BY recorded_at DESC
|
||||
) AS rn
|
||||
FROM ivanti_counts_history
|
||||
) WHERE rn = 1
|
||||
ORDER BY date ASC`
|
||||
);
|
||||
|
||||
// Build a map of date -> { open_count, closed_count }
|
||||
const countsMap = {};
|
||||
for (const row of countsRows) {
|
||||
countsMap[row.date] = { open: row.open_count, closed: row.closed_count };
|
||||
}
|
||||
|
||||
// Build per-date anomaly summaries from transitions
|
||||
const dateMap = {};
|
||||
for (const t of transitions) {
|
||||
if (!dateMap[t.date]) {
|
||||
dateMap[t.date] = { archived: 0, returned: 0, classification: {} };
|
||||
}
|
||||
const entry = dateMap[t.date];
|
||||
|
||||
if (t.to_state === 'ARCHIVED') {
|
||||
entry.archived += t.cnt;
|
||||
// All pre-feature transitions have reason 'severity_score_drift'
|
||||
// but from the investigation we know the 04/24 batch was mostly
|
||||
// BU reassignment. We can't retroactively classify without the
|
||||
// Ivanti API, so we label them as 'unclassified' (pre-feature).
|
||||
entry.classification.unclassified = (entry.classification.unclassified || 0) + t.cnt;
|
||||
} else if (t.to_state === 'RETURNED') {
|
||||
entry.returned += t.cnt;
|
||||
}
|
||||
// CLOSED transitions are not archive events — they're findings
|
||||
// confirmed in the closed set, so we don't count them as archived.
|
||||
}
|
||||
|
||||
// Compute deltas and insert rows
|
||||
const dates = Object.keys(dateMap).sort();
|
||||
let inserted = 0;
|
||||
|
||||
for (const date of dates) {
|
||||
const entry = dateMap[date];
|
||||
const counts = countsMap[date];
|
||||
|
||||
// Find the previous day's counts for delta computation
|
||||
const dateIdx = countsRows.findIndex(r => r.date === date);
|
||||
let openDelta = 0;
|
||||
let closedDelta = 0;
|
||||
|
||||
if (counts && dateIdx > 0) {
|
||||
const prev = countsRows[dateIdx - 1];
|
||||
openDelta = counts.open - prev.open_count;
|
||||
closedDelta = counts.closed - prev.closed_count;
|
||||
}
|
||||
|
||||
const isSignificant = entry.archived > 5 ? 1 : 0;
|
||||
const classificationJson = JSON.stringify(entry.classification);
|
||||
|
||||
await dbRun(db,
|
||||
`INSERT INTO ivanti_sync_anomaly_log
|
||||
(sync_timestamp, open_count_delta, closed_count_delta,
|
||||
newly_archived_count, returned_count, classification_json, is_significant)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
||||
[
|
||||
`${date}T23:59:00`,
|
||||
openDelta,
|
||||
closedDelta,
|
||||
entry.archived,
|
||||
entry.returned,
|
||||
classificationJson,
|
||||
isSignificant,
|
||||
]
|
||||
);
|
||||
inserted++;
|
||||
|
||||
const sigLabel = isSignificant ? ' [SIGNIFICANT]' : '';
|
||||
console.log(` ${date}: ${entry.archived} archived, ${entry.returned} returned, delta open=${openDelta} closed=${closedDelta}${sigLabel}`);
|
||||
}
|
||||
|
||||
console.log(`\nBackfill complete: ${inserted} anomaly log entries created.`);
|
||||
db.close();
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Fatal error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user