// Drift Checker — compares xlsx schema against parser config to detect structural drift // Returns categorised findings: breaking, silent_miss, cosmetic const fs = require('fs'); const path = require('path'); /** * Load and validate the compliance parser configuration file. * @param {string} configPath — absolute or relative path to compliance_config.json * @returns {object} parsed config with metric_categories, core_cols, skip_sheets * @throws {Error} descriptive error if file missing, invalid JSON, or missing required keys */ function loadConfig(configPath) { let raw; try { raw = fs.readFileSync(configPath, 'utf8'); } catch (err) { if (err.code === 'ENOENT') { throw new Error(`Configuration file not found: ${configPath}`); } throw new Error(`Failed to read configuration file: ${err.message}`); } let config; try { config = JSON.parse(raw); } catch (err) { throw new Error(`Configuration file contains invalid JSON: ${err.message}`); } if (!config.metric_categories || typeof config.metric_categories !== 'object' || Array.isArray(config.metric_categories)) { throw new Error('Configuration file is missing required key "metric_categories" (must be an object)'); } if (!Array.isArray(config.core_cols)) { throw new Error('Configuration file is missing required key "core_cols" (must be an array)'); } if (!Array.isArray(config.skip_sheets)) { throw new Error('Configuration file is missing required key "skip_sheets" (must be an array)'); } return config; } /** * Compare an xlsx schema against the parser config and produce a drift report. * @param {object} schema — output of extract_xlsx_schema.py: { sheets: [{ name, columns, metric_values? }] } * @param {object} config — parsed compliance_config.json: { metric_categories, core_cols, skip_sheets } * @returns {{ breaking: Array, silent_miss: Array, cosmetic: Array }} */ function compareSchemaToDrift(schema, config) { const breaking = []; const silent_miss = []; const cosmetic = []; const metricCategoryKeys = new Set(Object.keys(config.metric_categories)); const coreCols = new Set(config.core_cols); const skipSheets = new Set(config.skip_sheets); // Build lookup of xlsx sheet names and find the Summary sheet const xlsxSheetNames = new Set(); let summarySheet = null; for (const sheet of schema.sheets) { xlsxSheetNames.add(sheet.name); if (sheet.name === 'Summary') { summarySheet = sheet; } } // Identify detail sheets: present in xlsx AND not in skip_sheets const detailSheets = schema.sheets.filter(s => !skipSheets.has(s.name)); // Build set of metric values from the Summary sheet (used by multiple rules) const summaryMetrics = new Set( (summarySheet && Array.isArray(summarySheet.metric_values)) ? summarySheet.metric_values : [] ); // --- Breaking rules --- // Missing core column: a detail sheet is missing a column from core_cols. // Collect per-column stats first, then classify: if a column is missing from // ALL detail sheets it's breaking. If missing from only some (e.g. 5.8.1 uses // CMDB columns), it's cosmetic — the parser handles it via extra_json. const coreColMissingMap = {}; // col -> [sheet names missing it] for (const sheet of detailSheets) { const sheetCols = new Set(sheet.columns || []); for (const coreCol of config.core_cols) { if (!sheetCols.has(coreCol)) { if (!coreColMissingMap[coreCol]) coreColMissingMap[coreCol] = []; coreColMissingMap[coreCol].push(sheet.name); } } } for (const coreCol of Object.keys(coreColMissingMap)) { const missingSheets = coreColMissingMap[coreCol]; if (detailSheets.length > 0 && missingSheets.length >= detailSheets.length) { // Missing from ALL detail sheets — genuinely breaking breaking.push({ severity: 'breaking', message: `Core column "${coreCol}" is missing from all ${detailSheets.length} detail sheet(s)`, value: coreCol, sheet: null }); } else { // Missing from some sheets — structural difference, not drift cosmetic.push({ severity: 'cosmetic', message: `Core column "${coreCol}" is missing from ${missingSheets.length} of ${detailSheets.length} detail sheet(s): ${missingSheets.join(', ')}`, value: coreCol, sheet: null }); } } // Missing detail sheet: a sheet in metric_categories (not in skip_sheets) is absent from xlsx. // If the metric still appears in the Summary's metric_values, it's tracked but has zero // violations this week — downgrade to cosmetic instead of breaking. for (const metricKey of metricCategoryKeys) { if (!skipSheets.has(metricKey) && !xlsxSheetNames.has(metricKey)) { if (summaryMetrics.has(metricKey)) { cosmetic.push({ severity: 'cosmetic', message: `Metric "${metricKey}" has no detail sheet this week — still tracked in Summary (zero violations)`, value: metricKey, sheet: null }); } else { breaking.push({ severity: 'breaking', message: `Expected detail sheet "${metricKey}" (metric category) is missing from the workbook`, value: metricKey, sheet: null }); } } } // --- Silent-miss rules --- // Unknown metric value: a metric value in Summary is not a key in metric_categories if (summarySheet && Array.isArray(summarySheet.metric_values)) { for (const metricVal of summarySheet.metric_values) { if (!metricCategoryKeys.has(metricVal)) { silent_miss.push({ severity: 'silent_miss', message: `Unknown metric "${metricVal}" in Summary — not in metric_categories`, value: metricVal, sheet: 'Summary' }); } } } // Unknown sheet: an xlsx sheet not in skip_sheets and not in metric_categories for (const sheet of schema.sheets) { if (!skipSheets.has(sheet.name) && !metricCategoryKeys.has(sheet.name)) { silent_miss.push({ severity: 'silent_miss', message: `Unknown sheet "${sheet.name}" — not in skip_sheets or metric_categories`, value: sheet.name, sheet: sheet.name }); } } // --- Cosmetic rules --- // New column in detail sheet: a detail sheet has columns not in core_cols for (const sheet of detailSheets) { for (const col of (sheet.columns || [])) { if (!coreCols.has(col)) { cosmetic.push({ severity: 'cosmetic', message: `New column "${col}" in sheet "${sheet.name}" — will be captured in extra_json`, value: col, sheet: sheet.name }); } } } // Stale metric category: a key in metric_categories not in Summary metric values for (const metricKey of metricCategoryKeys) { if (!summaryMetrics.has(metricKey)) { cosmetic.push({ severity: 'cosmetic', message: `Stale metric category "${metricKey}" — not found in Summary sheet metric values`, value: metricKey, sheet: null }); } } return { breaking, silent_miss, cosmetic }; } /** * Reconcile the parser config to resolve breaking drift findings. * * Breaking — "missing detail sheet": * A metric_categories key has no matching xlsx sheet. But if the metric * still appears in the Summary sheet's metric_values, it's a legitimate * tracked metric that simply doesn't have violations this week — keep it. * Only remove metrics absent from BOTH the xlsx sheets AND the Summary. * * Breaking — "missing core column": * A core_cols entry is absent from one or more detail sheets. Only remove * if the column is missing from ALL detail sheets (some sheets like 5.8.1 * have a completely different column structure and shouldn't cause removal). * * Silent-miss — "unknown metric": * A metric value in the Summary is not in metric_categories. Add it as 'Other'. * * Silent-miss — "unknown sheet": * Left as a warning. Auto-adding unknown sheets creates a reconcile loop. * * @param {string} configPath — path to compliance_config.json * @param {object} driftReport — the drift report from compareSchemaToDrift() * @param {object} [schema] — optional xlsx schema (with sheets[].name and Summary metric_values) * @returns {{ changes: Array<{ action: string, key: string, value: string }>, config: object }} */ function reconcileConfig(configPath, driftReport, schema) { const config = loadConfig(configPath); const changes = []; // Build a set of metric values from the Summary sheet (if schema provided) const summaryMetrics = new Set(); if (schema && Array.isArray(schema.sheets)) { const summarySheet = schema.sheets.find(function(s) { return s.name === 'Summary'; }); if (summarySheet && Array.isArray(summarySheet.metric_values)) { summarySheet.metric_values.forEach(function(v) { summaryMetrics.add(v); }); } } // Build a set of xlsx sheet names (if schema provided) const xlsxSheetNames = new Set(); if (schema && Array.isArray(schema.sheets)) { schema.sheets.forEach(function(s) { xlsxSheetNames.add(s.name); }); } // Count how many detail sheets exist in the xlsx (excluding skip_sheets) const skipSheets = new Set(config.skip_sheets); const detailSheetCount = schema ? schema.sheets.filter(function(s) { return !skipSheets.has(s.name); }).length : 0; // --- Resolve breaking findings --- for (const finding of (driftReport.breaking || [])) { // Missing detail sheet: remove from metric_categories ONLY if the metric // is also absent from the Summary's metric_values. If it's in the Summary, // it's still a tracked metric — the sheet just has zero violations this week. if (finding.message.includes('is missing from the workbook') && finding.value in config.metric_categories) { if (summaryMetrics.has(finding.value)) { // Metric is in the Summary — keep it, just note it's sheet-less this week changes.push({ action: 'kept', key: 'metric_categories', value: finding.value, detail: `Kept metric "${finding.value}" — no detail sheet this week but still tracked in Summary` }); } else { const oldCategory = config.metric_categories[finding.value]; delete config.metric_categories[finding.value]; changes.push({ action: 'removed', key: 'metric_categories', value: finding.value, detail: `Removed stale metric category "${finding.value}" (was "${oldCategory}") — absent from both workbook sheets and Summary` }); } } // Missing core column: only remove if the column is missing from ALL detail sheets. // Some sheets (e.g. 5.8.1 with CMDB columns) have a completely different structure // and shouldn't cause removal of columns that exist in most other sheets. if (finding.message.includes('is missing core column') && config.core_cols.includes(finding.value)) { if (!changes.some(function(c) { return c.key === 'core_cols' && c.value === finding.value; })) { const missingFromCount = (driftReport.breaking || []).filter( function(f) { return f.message.includes('is missing core column') && f.value === finding.value; } ).length; if (detailSheetCount > 0 && missingFromCount >= detailSheetCount) { // Missing from ALL detail sheets — safe to remove config.core_cols = config.core_cols.filter(function(c) { return c !== finding.value; }); changes.push({ action: 'removed', key: 'core_cols', value: finding.value, detail: `Removed core column "${finding.value}" — missing from all ${detailSheetCount} detail sheet(s)` }); } else { // Missing from some sheets but present in others — keep it changes.push({ action: 'kept', key: 'core_cols', value: finding.value, detail: `Kept core column "${finding.value}" — missing from ${missingFromCount} of ${detailSheetCount} detail sheet(s)` }); } } } } // --- Resolve silent-miss findings --- for (const finding of (driftReport.silent_miss || [])) { // Unknown metric in Summary: add to metric_categories as 'Other' if (finding.message.includes('not in metric_categories') && !(finding.value in config.metric_categories)) { config.metric_categories[finding.value] = 'Other'; changes.push({ action: 'added', key: 'metric_categories', value: finding.value, detail: `Added new metric "${finding.value}" to metric_categories as "Other"` }); } // Unknown sheet: left as a warning — auto-adding creates a reconcile loop. } // Only write if there were actual config mutations (not just 'kept' entries) const hasMutations = changes.some(function(c) { return c.action !== 'kept'; }); if (hasMutations) { fs.writeFileSync(configPath, JSON.stringify(config, null, 2) + '\n', 'utf8'); } return { changes, config }; } module.exports = { compareSchemaToDrift, loadConfig, reconcileConfig };