Add admin page overhaul and compliance schema drift check specs, compliance upload improvements, drift checker helper

2026-04-20 20:12:12 +00:00
parent 6082721452
commit 043c85cc69
20 changed files with 56814 additions and 59 deletions
--- a/backend/cve_database.db.backupNVD
+++ b/backend/cve_database.db.backupNVD
--- a/backend/helpers/driftChecker.js
+++ b/backend/helpers/driftChecker.js
@@ -0,0 +1,332 @@
+// Drift Checker — compares xlsx schema against parser config to detect structural drift
+// Returns categorised findings: breaking, silent_miss, cosmetic
+
+const fs = require('fs');
+const path = require('path');
+
+/**
+ * Load and validate the compliance parser configuration file.
+ * @param {string} configPath — absolute or relative path to compliance_config.json
+ * @returns {object} parsed config with metric_categories, core_cols, skip_sheets
+ * @throws {Error} descriptive error if file missing, invalid JSON, or missing required keys
+ */
+function loadConfig(configPath) {
+  let raw;
+  try {
+    raw = fs.readFileSync(configPath, 'utf8');
+  } catch (err) {
+    if (err.code === 'ENOENT') {
+      throw new Error(`Configuration file not found: ${configPath}`);
+    }
+    throw new Error(`Failed to read configuration file: ${err.message}`);
+  }
+
+  let config;
+  try {
+    config = JSON.parse(raw);
+  } catch (err) {
+    throw new Error(`Configuration file contains invalid JSON: ${err.message}`);
+  }
+
+  if (!config.metric_categories || typeof config.metric_categories !== 'object' || Array.isArray(config.metric_categories)) {
+    throw new Error('Configuration file is missing required key "metric_categories" (must be an object)');
+  }
+  if (!Array.isArray(config.core_cols)) {
+    throw new Error('Configuration file is missing required key "core_cols" (must be an array)');
+  }
+  if (!Array.isArray(config.skip_sheets)) {
+    throw new Error('Configuration file is missing required key "skip_sheets" (must be an array)');
+  }
+
+  return config;
+}
+
+/**
+ * Compare an xlsx schema against the parser config and produce a drift report.
+ * @param {object} schema — output of extract_xlsx_schema.py: { sheets: [{ name, columns, metric_values? }] }
+ * @param {object} config — parsed compliance_config.json: { metric_categories, core_cols, skip_sheets }
+ * @returns {{ breaking: Array, silent_miss: Array, cosmetic: Array }}
+ */
+function compareSchemaToDrift(schema, config) {
+  const breaking = [];
+  const silent_miss = [];
+  const cosmetic = [];
+
+  const metricCategoryKeys = new Set(Object.keys(config.metric_categories));
+  const coreCols = new Set(config.core_cols);
+  const skipSheets = new Set(config.skip_sheets);
+
+  // Build lookup of xlsx sheet names and find the Summary sheet
+  const xlsxSheetNames = new Set();
+  let summarySheet = null;
+
+  for (const sheet of schema.sheets) {
+    xlsxSheetNames.add(sheet.name);
+    if (sheet.name === 'Summary') {
+      summarySheet = sheet;
+    }
+  }
+
+  // Identify detail sheets: present in xlsx AND not in skip_sheets
+  const detailSheets = schema.sheets.filter(s => !skipSheets.has(s.name));
+
+  // Build set of metric values from the Summary sheet (used by multiple rules)
+  const summaryMetrics = new Set(
+    (summarySheet && Array.isArray(summarySheet.metric_values)) ? summarySheet.metric_values : []
+  );
+
+  // --- Breaking rules ---
+
+  // Missing core column: a detail sheet is missing a column from core_cols.
+  // Collect per-column stats first, then classify: if a column is missing from
+  // ALL detail sheets it's breaking. If missing from only some (e.g. 5.8.1 uses
+  // CMDB columns), it's cosmetic — the parser handles it via extra_json.
+  const coreColMissingMap = {};  // col -> [sheet names missing it]
+  for (const sheet of detailSheets) {
+    const sheetCols = new Set(sheet.columns || []);
+    for (const coreCol of config.core_cols) {
+      if (!sheetCols.has(coreCol)) {
+        if (!coreColMissingMap[coreCol]) coreColMissingMap[coreCol] = [];
+        coreColMissingMap[coreCol].push(sheet.name);
+      }
+    }
+  }
+
+  for (const coreCol of Object.keys(coreColMissingMap)) {
+    const missingSheets = coreColMissingMap[coreCol];
+    if (detailSheets.length > 0 && missingSheets.length >= detailSheets.length) {
+      // Missing from ALL detail sheets — genuinely breaking
+      breaking.push({
+        severity: 'breaking',
+        message: `Core column "${coreCol}" is missing from all ${detailSheets.length} detail sheet(s)`,
+        value: coreCol,
+        sheet: null
+      });
+    } else {
+      // Missing from some sheets — structural difference, not drift
+      cosmetic.push({
+        severity: 'cosmetic',
+        message: `Core column "${coreCol}" is missing from ${missingSheets.length} of ${detailSheets.length} detail sheet(s): ${missingSheets.join(', ')}`,
+        value: coreCol,
+        sheet: null
+      });
+    }
+  }
+
+  // Missing detail sheet: a sheet in metric_categories (not in skip_sheets) is absent from xlsx.
+  // If the metric still appears in the Summary's metric_values, it's tracked but has zero
+  // violations this week — downgrade to cosmetic instead of breaking.
+  for (const metricKey of metricCategoryKeys) {
+    if (!skipSheets.has(metricKey) && !xlsxSheetNames.has(metricKey)) {
+      if (summaryMetrics.has(metricKey)) {
+        cosmetic.push({
+          severity: 'cosmetic',
+          message: `Metric "${metricKey}" has no detail sheet this week — still tracked in Summary (zero violations)`,
+          value: metricKey,
+          sheet: null
+        });
+      } else {
+        breaking.push({
+          severity: 'breaking',
+          message: `Expected detail sheet "${metricKey}" (metric category) is missing from the workbook`,
+          value: metricKey,
+          sheet: null
+        });
+      }
+    }
+  }
+
+  // --- Silent-miss rules ---
+
+  // Unknown metric value: a metric value in Summary is not a key in metric_categories
+  if (summarySheet && Array.isArray(summarySheet.metric_values)) {
+    for (const metricVal of summarySheet.metric_values) {
+      if (!metricCategoryKeys.has(metricVal)) {
+        silent_miss.push({
+          severity: 'silent_miss',
+          message: `Unknown metric "${metricVal}" in Summary — not in metric_categories`,
+          value: metricVal,
+          sheet: 'Summary'
+        });
+      }
+    }
+  }
+
+  // Unknown sheet: an xlsx sheet not in skip_sheets and not in metric_categories
+  for (const sheet of schema.sheets) {
+    if (!skipSheets.has(sheet.name) && !metricCategoryKeys.has(sheet.name)) {
+      silent_miss.push({
+        severity: 'silent_miss',
+        message: `Unknown sheet "${sheet.name}" — not in skip_sheets or metric_categories`,
+        value: sheet.name,
+        sheet: sheet.name
+      });
+    }
+  }
+
+  // --- Cosmetic rules ---
+
+  // New column in detail sheet: a detail sheet has columns not in core_cols
+  for (const sheet of detailSheets) {
+    for (const col of (sheet.columns || [])) {
+      if (!coreCols.has(col)) {
+        cosmetic.push({
+          severity: 'cosmetic',
+          message: `New column "${col}" in sheet "${sheet.name}" — will be captured in extra_json`,
+          value: col,
+          sheet: sheet.name
+        });
+      }
+    }
+  }
+
+  // Stale metric category: a key in metric_categories not in Summary metric values
+  for (const metricKey of metricCategoryKeys) {
+    if (!summaryMetrics.has(metricKey)) {
+      cosmetic.push({
+        severity: 'cosmetic',
+        message: `Stale metric category "${metricKey}" — not found in Summary sheet metric values`,
+        value: metricKey,
+        sheet: null
+      });
+    }
+  }
+
+  return { breaking, silent_miss, cosmetic };
+}
+
+/**
+ * Reconcile the parser config to resolve breaking drift findings.
+ *
+ * Breaking — "missing detail sheet":
+ *   A metric_categories key has no matching xlsx sheet. But if the metric
+ *   still appears in the Summary sheet's metric_values, it's a legitimate
+ *   tracked metric that simply doesn't have violations this week — keep it.
+ *   Only remove metrics absent from BOTH the xlsx sheets AND the Summary.
+ *
+ * Breaking — "missing core column":
+ *   A core_cols entry is absent from one or more detail sheets. Only remove
+ *   if the column is missing from ALL detail sheets (some sheets like 5.8.1
+ *   have a completely different column structure and shouldn't cause removal).
+ *
+ * Silent-miss — "unknown metric":
+ *   A metric value in the Summary is not in metric_categories. Add it as 'Other'.
+ *
+ * Silent-miss — "unknown sheet":
+ *   Left as a warning. Auto-adding unknown sheets creates a reconcile loop.
+ *
+ * @param {string} configPath — path to compliance_config.json
+ * @param {object} driftReport — the drift report from compareSchemaToDrift()
+ * @param {object} [schema] — optional xlsx schema (with sheets[].name and Summary metric_values)
+ * @returns {{ changes: Array<{ action: string, key: string, value: string }>, config: object }}
+ */
+function reconcileConfig(configPath, driftReport, schema) {
+  const config = loadConfig(configPath);
+  const changes = [];
+
+  // Build a set of metric values from the Summary sheet (if schema provided)
+  const summaryMetrics = new Set();
+  if (schema && Array.isArray(schema.sheets)) {
+    const summarySheet = schema.sheets.find(function(s) { return s.name === 'Summary'; });
+    if (summarySheet && Array.isArray(summarySheet.metric_values)) {
+      summarySheet.metric_values.forEach(function(v) { summaryMetrics.add(v); });
+    }
+  }
+
+  // Build a set of xlsx sheet names (if schema provided)
+  const xlsxSheetNames = new Set();
+  if (schema && Array.isArray(schema.sheets)) {
+    schema.sheets.forEach(function(s) { xlsxSheetNames.add(s.name); });
+  }
+
+  // Count how many detail sheets exist in the xlsx (excluding skip_sheets)
+  const skipSheets = new Set(config.skip_sheets);
+  const detailSheetCount = schema
+    ? schema.sheets.filter(function(s) { return !skipSheets.has(s.name); }).length
+    : 0;
+
+  // --- Resolve breaking findings ---
+
+  for (const finding of (driftReport.breaking || [])) {
+    // Missing detail sheet: remove from metric_categories ONLY if the metric
+    // is also absent from the Summary's metric_values. If it's in the Summary,
+    // it's still a tracked metric — the sheet just has zero violations this week.
+    if (finding.message.includes('is missing from the workbook') && finding.value in config.metric_categories) {
+      if (summaryMetrics.has(finding.value)) {
+        // Metric is in the Summary — keep it, just note it's sheet-less this week
+        changes.push({
+          action: 'kept',
+          key: 'metric_categories',
+          value: finding.value,
+          detail: `Kept metric "${finding.value}" — no detail sheet this week but still tracked in Summary`
+        });
+      } else {
+        const oldCategory = config.metric_categories[finding.value];
+        delete config.metric_categories[finding.value];
+        changes.push({
+          action: 'removed',
+          key: 'metric_categories',
+          value: finding.value,
+          detail: `Removed stale metric category "${finding.value}" (was "${oldCategory}") — absent from both workbook sheets and Summary`
+        });
+      }
+    }
+
+    // Missing core column: only remove if the column is missing from ALL detail sheets.
+    // Some sheets (e.g. 5.8.1 with CMDB columns) have a completely different structure
+    // and shouldn't cause removal of columns that exist in most other sheets.
+    if (finding.message.includes('is missing core column') && config.core_cols.includes(finding.value)) {
+      if (!changes.some(function(c) { return c.key === 'core_cols' && c.value === finding.value; })) {
+        const missingFromCount = (driftReport.breaking || []).filter(
+          function(f) { return f.message.includes('is missing core column') && f.value === finding.value; }
+        ).length;
+
+        if (detailSheetCount > 0 && missingFromCount >= detailSheetCount) {
+          // Missing from ALL detail sheets — safe to remove
+          config.core_cols = config.core_cols.filter(function(c) { return c !== finding.value; });
+          changes.push({
+            action: 'removed',
+            key: 'core_cols',
+            value: finding.value,
+            detail: `Removed core column "${finding.value}" — missing from all ${detailSheetCount} detail sheet(s)`
+          });
+        } else {
+          // Missing from some sheets but present in others — keep it
+          changes.push({
+            action: 'kept',
+            key: 'core_cols',
+            value: finding.value,
+            detail: `Kept core column "${finding.value}" — missing from ${missingFromCount} of ${detailSheetCount} detail sheet(s)`
+          });
+        }
+      }
+    }
+  }
+
+  // --- Resolve silent-miss findings ---
+
+  for (const finding of (driftReport.silent_miss || [])) {
+    // Unknown metric in Summary: add to metric_categories as 'Other'
+    if (finding.message.includes('not in metric_categories') && !(finding.value in config.metric_categories)) {
+      config.metric_categories[finding.value] = 'Other';
+      changes.push({
+        action: 'added',
+        key: 'metric_categories',
+        value: finding.value,
+        detail: `Added new metric "${finding.value}" to metric_categories as "Other"`
+      });
+    }
+
+    // Unknown sheet: left as a warning — auto-adding creates a reconcile loop.
+  }
+
+  // Only write if there were actual config mutations (not just 'kept' entries)
+  const hasMutations = changes.some(function(c) { return c.action !== 'kept'; });
+  if (hasMutations) {
+    fs.writeFileSync(configPath, JSON.stringify(config, null, 2) + '\n', 'utf8');
+  }
+
+  return { changes, config };
+}
+
+module.exports = { compareSchemaToDrift, loadConfig, reconcileConfig };
--- a/backend/routes/compliance.js
+++ b/backend/routes/compliance.js
@@ -2,25 +2,35 @@
 // Handles xlsx upload/parse, non-compliant item history, and notes.
 //
 // Endpoints:
-//   POST /preview          — parse xlsx, compute diff vs DB, return summary (no DB write)
-//   POST /commit           — commit a previewed upload to DB
-//   GET  /uploads          — list all uploads
-//   GET  /summary          — metric health cards for a team (from latest upload)
-//   GET  /items            — non-compliant devices grouped by hostname (?team=X&status=active)
-//   GET  /items/:hostname  — detail panel: all metrics + notes + upload history for a device
-//   POST /notes            — add a note to one or more (hostname, metric_id) pairs
+//   POST /preview              — parse xlsx, run drift check, compute diff (no DB write)
+//   POST /reconcile-config     — patch compliance_config.json to resolve drift findings
+//   POST /commit               — commit a previewed upload to DB
+//   GET  /uploads              — list all uploads
+//   POST /rollback/:uploadId   — roll back the most recent upload (Admin only)
+//   GET  /summary              — metric health cards for a team (from latest upload)
+//   GET  /items                — non-compliant devices grouped by hostname (?team=X&status=active)
+//   GET  /items/:hostname      — detail panel: all metrics + notes + upload history for a device
+//   POST /notes                — add a note to one or more (hostname, metric_id) pairs
 //   GET  /notes/:hostname/:metricId — notes for a specific device+metric
+//   GET  /trends               — per-upload totals + per-team counts for time-series charts
+//   GET  /mttr                 — mean time to resolution per team
+//   GET  /top-recurring        — chronic compliance gaps sorted by seen_count
+//   GET  /category-trend       — active counts per category per upload for stacked area chart

 const express  = require('express');
 const path     = require('path');
 const fs       = require('fs');
 const crypto   = require('crypto');
 const { spawn } = require('child_process');
+const { loadConfig, compareSchemaToDrift, reconcileConfig } = require('../helpers/driftChecker');
+const logAudit = require('../helpers/auditLog');

-const PARSER_SCRIPT = path.join(__dirname, '../scripts/parse_compliance_xlsx.py');
-const PYTHON_BIN    = process.env.PYTHON_BIN || 'python3';
-const TEMP_DIR      = path.join(process.cwd(), 'uploads', 'temp');
-const ALLOWED_TEAMS = new Set(['STEAM', 'ACCESS-ENG', 'ACCESS-OPS', 'INTELDEV']);
+const PARSER_SCRIPT  = path.join(__dirname, '../scripts/parse_compliance_xlsx.py');
+const SCHEMA_SCRIPT  = path.join(__dirname, '../scripts/extract_xlsx_schema.py');
+const CONFIG_PATH    = path.join(__dirname, '..', 'scripts', 'compliance_config.json');
+const PYTHON_BIN     = process.env.PYTHON_BIN || 'python3';
+const TEMP_DIR       = path.join(process.cwd(), 'uploads', 'temp');
+const ALLOWED_TEAMS  = new Set(['STEAM', 'ACCESS-ENG', 'ACCESS-OPS', 'INTELDEV']);

 // ---------------------------------------------------------------------------
 // DB helpers
@@ -63,6 +73,25 @@ function parseXlsx(filePath) {
    });
 }

+// ---------------------------------------------------------------------------
+// Run Python schema extractor, return xlsx schema object
+// ---------------------------------------------------------------------------
+function extractXlsxSchema(filePath) {
+    return new Promise((resolve, reject) => {
+        const py = spawn(PYTHON_BIN, [SCHEMA_SCRIPT, filePath]);
+        let out = '';
+        let err = '';
+        py.stdout.on('data', d => { out += d; });
+        py.stderr.on('data', d => { err += d; });
+        py.on('close', code => {
+            if (code !== 0) return reject(new Error(err || `Schema extractor exited with code ${code}`));
+            try { resolve(JSON.parse(out)); }
+            catch (e) { reject(new Error('Schema extractor returned invalid JSON')); }
+        });
+        py.on('error', reject);
+    });
+}
+
 // ---------------------------------------------------------------------------
 // Validate that a temp file path is safely within uploads/temp/
 // ---------------------------------------------------------------------------
@@ -228,6 +257,15 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // POST /preview
    // Parse the uploaded xlsx, compute diff, save parsed data to a temp JSON.
    // Returns diff counts + tempFile path for the commit step.
+    //
+    // Body: multipart/form-data with `file` field (xlsx)
+    // Response: {
+    //   drift: { breaking: [], silent_miss: [], cosmetic: [] } | null,
+    //   drift_error: string | null,
+    //   diff: { new_count, recurring_count, resolved_count },
+    //   tempFile: string, filename: string,
+    //   report_date: string, total_items: number
+    // }
    // -----------------------------------------------------------------------
    router.post('/preview', requireGroup('Admin', 'Standard_User'), (req, res) => {
        upload.single('file')(req, res, async (uploadErr) => {
@@ -243,6 +281,31 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
            }

            try {
+                // --- Drift check: load config, extract schema, compare ---
+                let drift = null;
+                let drift_error = null;
+
+                let config;
+                try {
+                    config = loadConfig(CONFIG_PATH);
+                } catch (configErr) {
+                    fs.unlink(req.file.path, () => {});
+                    return res.status(500).json({ error: 'Configuration file could not be loaded: ' + configErr.message });
+                }
+
+                let xlsxSchema = null;
+                try {
+                    xlsxSchema = await extractXlsxSchema(req.file.path);
+                    if (xlsxSchema.error) {
+                        throw new Error(xlsxSchema.error);
+                    }
+                    drift = compareSchemaToDrift(xlsxSchema, config);
+                } catch (driftErr) {
+                    drift = null;
+                    drift_error = driftErr.message || 'Drift check failed';
+                }
+
+                // --- Existing parse flow ---
                const parsed = await parseXlsx(req.file.path);

                if (parsed.error) {
@@ -268,6 +331,9 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
                fs.unlink(req.file.path, () => {});

                res.json({
+                    drift,
+                    drift_error,
+                    schema: xlsxSchema,
                    diff: {
                        new_count:       diff.newCount,
                        recurring_count: diff.recurringCount,
@@ -287,10 +353,63 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
        });
    });

+    // -----------------------------------------------------------------------
+    // POST /reconcile-config
+    // Admin-only. Patches compliance_config.json to resolve breaking and
+    // silent-miss drift findings, then re-runs the drift check and returns
+    // the updated report. Logs every change to the audit trail.
+    //
+    // Body: { drift: { breaking: [...], silent_miss: [...] } }
+    // Response: { changes: [{ action, key, value, detail }], message: string }
+    // -----------------------------------------------------------------------
+    router.post('/reconcile-config', requireGroup('Admin'), async (req, res) => {
+        const { drift, schema } = req.body;
+
+        if (!drift || typeof drift !== 'object') {
+            return res.status(400).json({ error: 'drift report is required in request body' });
+        }
+
+        const hasFindings = (drift.breaking && drift.breaking.length > 0) ||
+                            (drift.silent_miss && drift.silent_miss.length > 0);
+        if (!hasFindings) {
+            return res.status(400).json({ error: 'No breaking or silent-miss findings to reconcile' });
+        }
+
+        try {
+            const { changes } = reconcileConfig(CONFIG_PATH, drift, schema || null);
+
+            if (changes.length === 0) {
+                return res.json({ changes: [], message: 'No changes needed' });
+            }
+
+            // Audit log each change
+            for (const change of changes) {
+                logAudit(db, {
+                    userId:     req.user.id,
+                    username:   req.user.username,
+                    action:     'compliance_config_reconcile',
+                    entityType: 'compliance_config',
+                    entityId:   change.value,
+                    details:    { action: change.action, key: change.key, detail: change.detail },
+                    ipAddress:  req.ip,
+                });
+            }
+
+            res.json({ changes, message: `Reconciled ${changes.length} config change(s)` });
+
+        } catch (err) {
+            console.error('[Compliance] Reconcile config error:', err.message);
+            res.status(500).json({ error: 'Failed to reconcile config: ' + err.message });
+        }
+    });
+
    // -----------------------------------------------------------------------
    // POST /commit
    // Commit a previewed upload to the DB.
-    // Body: { tempFile, filename, report_date }
+    //
+    // Body: { tempFile: string, filename: string, report_date: string }
+    // Response: { upload: { id, filename, report_date, uploaded_at,
+    //   new_count, resolved_count, recurring_count } }
    // -----------------------------------------------------------------------
    router.post('/commit', requireGroup('Admin', 'Standard_User'), async (req, res) => {
        const { tempFile, filename, report_date } = req.body;
@@ -341,6 +460,9 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // -----------------------------------------------------------------------
    // GET /uploads
    // List all uploads, most recent first.
+    //
+    // Response: { uploads: [{ id, filename, report_date, uploaded_at,
+    //   new_count, resolved_count, recurring_count }] }
    // -----------------------------------------------------------------------
    router.get('/uploads', async (req, res) => {
        try {
@@ -357,9 +479,133 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
        }
    });

+    // -----------------------------------------------------------------------
+    // POST /rollback/:uploadId
+    // Admin-only. Rolls back a specific upload. Only the most recent upload
+    // can be rolled back to avoid cascading data integrity issues.
+    //
+    // Params: uploadId — integer ID of the upload to roll back
+    // Response: { message: string, rolled_back: { upload_id, filename,
+    //   report_date, items_deleted, items_reactivated } }
+    //
+    // Reversal logic:
+    //   1. Delete items first seen in this upload (new items)
+    //   2. Re-activate items resolved by this upload
+    //   3. Revert recurring items: decrement seen_count, point upload_id
+    //      back to the previous upload
+    //   4. Delete the upload record
+    // -----------------------------------------------------------------------
+    router.post('/rollback/:uploadId', requireGroup('Admin'), async (req, res) => {
+        const uploadId = parseInt(req.params.uploadId, 10);
+        if (isNaN(uploadId)) {
+            return res.status(400).json({ error: 'Invalid upload ID' });
+        }
+
+        try {
+            // Verify the upload exists
+            const upload = await dbGet(db,
+                `SELECT id, filename, report_date, new_count, resolved_count, recurring_count
+                 FROM compliance_uploads WHERE id = ?`,
+                [uploadId]
+            );
+            if (!upload) {
+                return res.status(404).json({ error: 'Upload not found' });
+            }
+
+            // Only allow rolling back the most recent upload
+            const latest = await dbGet(db,
+                `SELECT id FROM compliance_uploads ORDER BY id DESC LIMIT 1`
+            );
+            if (latest.id !== uploadId) {
+                return res.status(400).json({
+                    error: 'Only the most recent upload can be rolled back',
+                    latest_upload_id: latest.id
+                });
+            }
+
+            // Find the previous upload (to restore recurring items' upload_id)
+            const previousUpload = await dbGet(db,
+                `SELECT id FROM compliance_uploads WHERE id < ? ORDER BY id DESC LIMIT 1`,
+                [uploadId]
+            );
+
+            await dbRun(db, 'BEGIN TRANSACTION');
+
+            try {
+                // 1. Delete items that were NEW in this upload
+                const deleteNew = await dbRun(db,
+                    `DELETE FROM compliance_items WHERE first_seen_upload_id = ? AND upload_id = ?`,
+                    [uploadId, uploadId]
+                );
+
+                // 2. Re-activate items that were RESOLVED by this upload
+                const reactivate = await dbRun(db,
+                    `UPDATE compliance_items
+                     SET status = 'active', resolved_upload_id = NULL
+                     WHERE resolved_upload_id = ?`,
+                    [uploadId]
+                );
+
+                // 3. Revert RECURRING items: decrement seen_count, restore upload_id
+                if (previousUpload) {
+                    await dbRun(db,
+                        `UPDATE compliance_items
+                         SET upload_id = ?, seen_count = MAX(seen_count - 1, 1)
+                         WHERE upload_id = ? AND first_seen_upload_id != ?`,
+                        [previousUpload.id, uploadId, uploadId]
+                    );
+                }
+
+                // 4. Delete the upload record
+                await dbRun(db, `DELETE FROM compliance_uploads WHERE id = ?`, [uploadId]);
+
+                await dbRun(db, 'COMMIT');
+
+                // Audit log
+                logAudit(db, {
+                    userId:     req.user.id,
+                    username:   req.user.username,
+                    action:     'compliance_upload_rollback',
+                    entityType: 'compliance_upload',
+                    entityId:   String(uploadId),
+                    details:    {
+                        filename:    upload.filename,
+                        report_date: upload.report_date,
+                        items_deleted:     deleteNew.changes,
+                        items_reactivated: reactivate.changes,
+                    },
+                    ipAddress:  req.ip,
+                });
+
+                res.json({
+                    message: `Rolled back upload "${upload.filename}"`,
+                    rolled_back: {
+                        upload_id:         uploadId,
+                        filename:          upload.filename,
+                        report_date:       upload.report_date,
+                        items_deleted:     deleteNew.changes,
+                        items_reactivated: reactivate.changes,
+                    },
+                });
+
+            } catch (err) {
+                await dbRun(db, 'ROLLBACK').catch(() => {});
+                throw err;
+            }
+
+        } catch (err) {
+            console.error('[Compliance] Rollback error:', err.message);
+            res.status(500).json({ error: 'Failed to rollback upload: ' + err.message });
+        }
+    });
+
    // -----------------------------------------------------------------------
    // GET /summary?team=STEAM
    // Return metric health rows for a team from the latest upload's summary_json.
+    //
+    // Query: team — optional, one of ALLOWED_TEAMS
+    // Response: { entries: [...], overall_scores: {}, upload: { id,
+    //   report_date, uploaded_at } | null }
    // -----------------------------------------------------------------------
    router.get('/summary', async (req, res) => {
        const team = req.query.team;
@@ -403,6 +649,12 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // -----------------------------------------------------------------------
    // GET /items?team=STEAM&status=active
    // Return non-compliant devices grouped by hostname.
+    //
+    // Query: team — required, one of ALLOWED_TEAMS
+    //        status — optional, 'active' (default) or 'resolved'
+    // Response: { devices: [{ hostname, ip_address, device_type, team,
+    //   status, failing_metrics, seen_count, first_seen, last_seen,
+    //   resolved_on, has_notes }], team, status }
    // -----------------------------------------------------------------------
    router.get('/items', async (req, res) => {
        const { team, status = 'active' } = req.query;
@@ -448,6 +700,12 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // -----------------------------------------------------------------------
    // GET /items/:hostname
    // Detail panel: all metric rows for this hostname + notes + upload history.
+    //
+    // Params: hostname — device hostname string
+    // Response: { hostname, ip_address, device_type, team,
+    //   metrics: [{ metric_id, metric_desc, category, status, seen_count,
+    //     extra, first_seen, last_seen, resolved_on, ... }],
+    //   notes: [{ id, metric_id, note, group_id, created_at, created_by }] }
    // -----------------------------------------------------------------------
    router.get('/items/:hostname', async (req, res) => {
        const hostname = req.params.hostname;
@@ -519,7 +777,11 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // -----------------------------------------------------------------------
    // POST /notes
    // Add a note to one or more (hostname, metric_id) pairs.
-    // Body: { hostname, metric_ids: [...], note }  — or legacy { hostname, metric_id, note }
+    //
+    // Body: { hostname: string, metric_ids: string[], note: string }
+    //   — or legacy: { hostname: string, metric_id: string, note: string }
+    // Response: { notes: [{ id, hostname, metric_id, note, group_id,
+    //   created_at, created_by }] }
    // -----------------------------------------------------------------------
    router.post('/notes', requireGroup('Admin', 'Standard_User'), async (req, res) => {
        const { hostname, metric_id, metric_ids, note } = req.body;
@@ -602,6 +864,10 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // -----------------------------------------------------------------------
    // GET /notes/:hostname/:metricId
    // Return all notes for a (hostname, metric_id) pair.
+    //
+    // Params: hostname — device hostname string
+    //         metricId — metric identifier string
+    // Response: { notes: [{ id, note, created_at, created_by }] }
    // -----------------------------------------------------------------------
    router.get('/notes/:hostname/:metricId', async (req, res) => {
        const { hostname, metricId } = req.params;
@@ -629,6 +895,10 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // GET /trends
    // Per-upload active totals + per-team counts for time-series charts.
    // Returns rows ordered ascending by report_date.
+    //
+    // Response: { trends: [{ report_date, new_count, recurring_count,
+    //   resolved_count, total_active, STEAM, ACCESS-ENG, ACCESS-OPS,
+    //   INTELDEV }] }
    // -----------------------------------------------------------------------
    router.get('/trends', async (req, res) => {
        try {
@@ -681,6 +951,8 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // -----------------------------------------------------------------------
    // GET /mttr
    // Mean time to resolution (calendar days) per team, for resolved items.
+    //
+    // Response: { mttr: [{ team, avg_days, resolved_count }] }
    // -----------------------------------------------------------------------
    router.get('/mttr', async (req, res) => {
        try {
@@ -709,6 +981,9 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // GET /top-recurring
    // Active findings grouped by team + metric_id, sorted by seen_count desc.
    // Identifies chronic compliance gaps that keep reappearing.
+    //
+    // Response: { items: [{ team, metric_id, metric_desc, seen_count,
+    //   host_count }] }  — limited to top 20
    // -----------------------------------------------------------------------
    router.get('/top-recurring', async (req, res) => {
        try {
@@ -730,6 +1005,8 @@ function createComplianceRouter(db, upload, requireAuth, requireGroup) {
    // -----------------------------------------------------------------------
    // GET /category-trend
    // Active item counts per category per upload, for stacked area chart.
+    //
+    // Response: { categoryTrend: [{ report_date, category, count }] }
    // -----------------------------------------------------------------------
    router.get('/category-trend', async (req, res) => {
        try {
--- a/backend/scripts/pycache/extract_xlsx_schema.cpython-312.pyc
+++ b/backend/scripts/pycache/extract_xlsx_schema.cpython-312.pyc
--- a/backend/scripts/compliance_config.json
+++ b/backend/scripts/compliance_config.json
@@ -0,0 +1,44 @@
+{
+  "metric_categories": {
+    "1.1.1": "Logging & Monitoring",
+    "1.1.3": "Logging & Monitoring",
+    "1.4.1": "Logging & Monitoring",
+    "2.3.4i": "Vulnerability Management",
+    "2.3.6i": "Vulnerability Management",
+    "2.3.8i": "Vulnerability Management",
+    "5.2.4": "Access & MFA",
+    "5.2.5": "Access & MFA",
+    "5.2.6": "Access & MFA",
+    "5.2.7": "Access & MFA",
+    "5.2.8": "Access & MFA",
+    "5.3.4": "Endpoint Protection",
+    "5.5.4i": "Vulnerability Management",
+    "5.5.5": "Decommissioned Assets",
+    "5.8.1": "Application Security",
+    "7.1.1": "Logging & Monitoring",
+    "7.1.4": "Logging & Monitoring",
+    "7.6.13": "Disaster Recovery",
+    "7.6.16": "Disaster Recovery",
+    "Missing_AppID": "Asset Data Quality",
+    "Missing_DF": "Asset Data Quality",
+    "Missing_OS": "Asset Data Quality",
+    "5.5.2": "Other"
+  },
+  "core_cols": [
+    "Preferred - Hostname",
+    "GRANITE - IPv4_Address",
+    "GRANITE - Type",
+    "Team",
+    "Compliant",
+    "Source_Network",
+    "Vertical",
+    "GRANITE - Equip_Inst_ID",
+    "GRANITE - RESPONSIBLE_TEAM"
+  ],
+  "skip_sheets": [
+    "Summary",
+    "CMDB_9box",
+    "Vulns",
+    "Aging Dashboard"
+  ]
+}
--- a/backend/scripts/extract_xlsx_schema.py
+++ b/backend/scripts/extract_xlsx_schema.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""
+Extract the structural schema of a compliance xlsx file as JSON.
+Usage: python3 extract_xlsx_schema.py <path_to_xlsx>
+
+Output:
+{
+  "sheets": [
+    {
+      "name": "Summary",
+      "columns": ["Metric", "Non-Compliant", "..."],
+      "metric_values": ["2.3.4i", "5.2.4", "..."]
+    },
+    {
+      "name": "2.3.4i",
+      "columns": ["Preferred - Hostname", "GRANITE - IPv4_Address", "..."]
+    }
+  ]
+}
+
+- Uses openpyxl in read-only mode.
+- Extracts sheet names, first-row column headers per sheet, and unique metric
+  values from the Summary sheet (header at row 4, data from row 5 onward).
+- On error, returns { "error": "..." } on stdout and exits with non-zero code.
+
+Dependencies: openpyxl (already in requirements.txt)
+"""
+import sys
+import json
+from openpyxl import load_workbook
+
+
+def main():
+    if len(sys.argv) < 2:
+        print(json.dumps({"error": "No file path provided"}))
+        sys.exit(1)
+
+    filepath = sys.argv[1]
+
+    try:
+        wb = load_workbook(filepath, read_only=True, data_only=True)
+    except Exception as e:
+        print(json.dumps({"error": f"Cannot open file: {str(e)}"}))
+        sys.exit(1)
+
+    if not wb.sheetnames:
+        print(json.dumps({"error": "Workbook contains no sheets"}))
+        wb.close()
+        sys.exit(1)
+
+    sheets = []
+    for sheet_name in wb.sheetnames:
+        ws = wb[sheet_name]
+
+        # Extract first-row column headers
+        rows = list(ws.iter_rows(max_row=1, values_only=True))
+        columns = [str(c).strip() for c in rows[0] if c is not None] if rows else []
+
+        entry = {
+            "name": sheet_name,
+            "columns": columns,
+        }
+
+        # Extract metric values from the Summary sheet
+        # Summary has header at row 4, data from row 5 onward
+        if sheet_name == "Summary":
+            metric_values = []
+            header_rows = list(ws.iter_rows(min_row=4, max_row=4, values_only=True))
+            if header_rows:
+                summary_cols = [str(c).strip() if c else "" for c in header_rows[0]]
+                metric_idx = None
+                for i, col in enumerate(summary_cols):
+                    if col == "Metric":
+                        metric_idx = i
+                        break
+                if metric_idx is not None:
+                    for row in ws.iter_rows(min_row=5, values_only=True):
+                        if row[metric_idx] is not None:
+                            val = str(row[metric_idx]).strip()
+                            if val and val != "Metric":
+                                metric_values.append(val)
+            entry["metric_values"] = sorted(set(metric_values))
+
+        sheets.append(entry)
+
+    wb.close()
+    print(json.dumps({"sheets": sheets}))
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/scripts/parse_compliance_xlsx.py
+++ b/backend/scripts/parse_compliance_xlsx.py
@@ -12,45 +12,35 @@ Output:
 }
 """
 import sys
+import os
 import json
 import re
 import pandas as pd
 from pathlib import Path

-METRIC_CATEGORIES = {
-    '2.3.4i':  'Vulnerability Management',
-    '2.3.6i':  'Vulnerability Management',
-    '2.3.8i':  'Vulnerability Management',
-    '5.2.4':   'Access & MFA',
-    '5.2.5':   'Access & MFA',
-    '5.2.6':   'Access & MFA',
-    '5.3.4':   'Endpoint Protection',
-    '5.5.2':   'End-of-Life OS',
-    '5.5.4i':  'Vulnerability Management',
-    '5.5.5':   'Decommissioned Assets',
-    '5.8.1':   'Application Security',
-    '7.1.1':   'Logging & Monitoring',
-    '7.6.13':  'Disaster Recovery',
-    '7.6.16':  'Disaster Recovery',
-    '1.1.1':   'Logging & Monitoring',
-    '1.1.3':   'Logging & Monitoring',
-    '1.4.1':   'Logging & Monitoring',
-    '5.2.7':   'Access & MFA',
-    '5.2.8':   'Access & MFA',
-    '7.1.4':   'Logging & Monitoring',
-    'Missing_AppID': 'Asset Data Quality',
-    'Missing_DF':    'Asset Data Quality',
-    'Missing_OS':    'Asset Data Quality',
-}

-# Columns that go into the main item fields — everything else becomes extra_json
-CORE_COLS = {
-    'Preferred - Hostname', 'GRANITE - IPv4_Address', 'GRANITE - Type',
-    'Team', 'Compliant', 'Source_Network', 'Vertical',
-    'GRANITE - Equip_Inst_ID', 'GRANITE - RESPONSIBLE_TEAM',
-}
+def load_config():
+    """Load parser configuration from compliance_config.json."""
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    config_path = os.path.join(script_dir, 'compliance_config.json')

-SKIP_SHEETS = {'Summary', 'CMDB_9box', 'Vulns', 'Aging Dashboard'}
+    try:
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+    except FileNotFoundError:
+        print(f"Error: Configuration file not found: {config_path}", file=sys.stderr)
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid JSON in configuration file {config_path}: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    return config
+
+
+_config = load_config()
+METRIC_CATEGORIES = _config['metric_categories']
+CORE_COLS = set(_config['core_cols'])
+SKIP_SHEETS = set(_config['skip_sheets'])


 def safe_str(val):