Fix duplicate failing metrics on same asset across compliance endpoints

Deduplicate (hostname, metric_id) rows across verticals using DISTINCT ON in
GET /items, GET /items/:hostname, GET /vcl/stats (heavy-hitters + forecast),
GET /mttr, and persistUpload() snapshot block. Add defensive groupByHostname
Set and hostname_status CTE for snapshot classification.

Includes 38 property-based tests (11 exploration + 27 preservation) covering
all six affected sites.

Closes #13
This commit is contained in:
Jordan Ramos
2026-05-18 15:57:10 -06:00
parent da5505bd27
commit 520f50fbbf
3 changed files with 2242 additions and 28 deletions

View File

@@ -160,18 +160,24 @@ async function persistUpload({ items, summary, reportDate, filename, userId, ver
const currentMonth = new Date().toISOString().slice(0, 7); // YYYY-MM const currentMonth = new Date().toISOString().slice(0, 7); // YYYY-MM
// Compute compliance percentages for the snapshotted vertical only. // Compute compliance percentages for the snapshotted vertical only.
// `IS NOT DISTINCT FROM` matches the legacy `vertical IS NULL` case // CTE classifies each hostname by its worst-case status (active wins
// when the upload is an AEO-only upload (vertical = null), so the // over resolved via MIN) so a hostname with both active and resolved
// single-vertical-month preservation path keeps its previous // rows across verticals is counted in exactly one column.
// semantics.
const { rows: verticalStats } = await pool.query( const { rows: verticalStats } = await pool.query(
`SELECT vertical, team, `WITH hostname_status AS (
COUNT(DISTINCT hostname)::int AS total_devices, SELECT team,
COUNT(DISTINCT CASE WHEN status = 'resolved' THEN hostname END)::int AS compliant, hostname,
COUNT(DISTINCT CASE WHEN status = 'active' THEN hostname END)::int AS non_compliant MIN(status) AS status
FROM compliance_items FROM compliance_items
WHERE team IS NOT NULL AND vertical IS NOT DISTINCT FROM $1 WHERE team IS NOT NULL AND vertical IS NOT DISTINCT FROM $1
GROUP BY vertical, team`, GROUP BY team, hostname
)
SELECT team AS vertical,
COUNT(*)::int AS total_devices,
COUNT(*) FILTER (WHERE status = 'resolved')::int AS compliant,
COUNT(*) FILTER (WHERE status = 'active')::int AS non_compliant
FROM hostname_status
GROUP BY team`,
[vertical] [vertical]
); );
@@ -217,18 +223,22 @@ function groupByHostname(rows, noteHostnames) {
deviceMap[row.hostname] = { deviceMap[row.hostname] = {
hostname: row.hostname, ip_address: row.ip_address || '', device_type: row.device_type || '', hostname: row.hostname, ip_address: row.ip_address || '', device_type: row.device_type || '',
team: row.team || '', status: row.status, failing_metrics: [], team: row.team || '', status: row.status, failing_metrics: [],
_seenMetricIds: new Set(),
seen_count: row.seen_count || 1, first_seen: row.first_seen || null, seen_count: row.seen_count || 1, first_seen: row.first_seen || null,
last_seen: row.last_seen || null, resolved_on: row.resolved_on || null, last_seen: row.last_seen || null, resolved_on: row.resolved_on || null,
has_notes: noteHostnames.has(row.hostname), has_notes: noteHostnames.has(row.hostname),
}; };
} }
const dev = deviceMap[row.hostname]; const dev = deviceMap[row.hostname];
if (!dev._seenMetricIds.has(row.metric_id)) {
dev._seenMetricIds.add(row.metric_id);
dev.failing_metrics.push({ metric_id: row.metric_id, metric_desc: row.metric_desc || '', category: row.category || '' }); dev.failing_metrics.push({ metric_id: row.metric_id, metric_desc: row.metric_desc || '', category: row.category || '' });
}
if ((row.seen_count || 1) > dev.seen_count) dev.seen_count = row.seen_count; if ((row.seen_count || 1) > dev.seen_count) dev.seen_count = row.seen_count;
if (row.first_seen && (!dev.first_seen || row.first_seen < dev.first_seen)) dev.first_seen = row.first_seen; if (row.first_seen && (!dev.first_seen || row.first_seen < dev.first_seen)) dev.first_seen = row.first_seen;
if (row.last_seen && (!dev.last_seen || row.last_seen > dev.last_seen)) dev.last_seen = row.last_seen; if (row.last_seen && (!dev.last_seen || row.last_seen > dev.last_seen)) dev.last_seen = row.last_seen;
} }
return Object.values(deviceMap); return Object.values(deviceMap).map(({ _seenMetricIds, ...dev }) => dev);
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@@ -584,15 +594,17 @@ function createComplianceRouter(upload) {
try { try {
// Include items from both AEO uploads (vertical IS NULL) and NTS_AEO multi-vertical uploads // Include items from both AEO uploads (vertical IS NULL) and NTS_AEO multi-vertical uploads
// DISTINCT ON deduplicates cross-vertical (hostname, metric_id) pairs, keeping the representative row
const { rows } = await pool.query( const { rows } = await pool.query(
`SELECT ci.hostname, ci.ip_address, ci.device_type, ci.team, ci.metric_id, ci.metric_desc, ci.category, ci.status, ci.seen_count, `SELECT DISTINCT ON (ci.hostname, ci.metric_id)
ci.hostname, ci.ip_address, ci.device_type, ci.team, ci.metric_id, ci.metric_desc, ci.category, ci.status, ci.seen_count,
fu.report_date AS first_seen, lu.report_date AS last_seen, ru.report_date AS resolved_on fu.report_date AS first_seen, lu.report_date AS last_seen, ru.report_date AS resolved_on
FROM compliance_items ci FROM compliance_items ci
LEFT JOIN compliance_uploads fu ON ci.first_seen_upload_id = fu.id LEFT JOIN compliance_uploads fu ON ci.first_seen_upload_id = fu.id
LEFT JOIN compliance_uploads lu ON ci.upload_id = lu.id LEFT JOIN compliance_uploads lu ON ci.upload_id = lu.id
LEFT JOIN compliance_uploads ru ON ci.resolved_upload_id = ru.id LEFT JOIN compliance_uploads ru ON ci.resolved_upload_id = ru.id
WHERE ci.team = $1 AND ci.status = $2 AND (ci.vertical IS NULL OR ci.vertical = 'NTS_AEO') WHERE ci.team = $1 AND ci.status = $2 AND (ci.vertical IS NULL OR ci.vertical = 'NTS_AEO')
ORDER BY ci.hostname, ci.metric_id`, ORDER BY ci.hostname, ci.metric_id, ci.seen_count DESC, ci.upload_id DESC`,
[team, status] [team, status]
); );
@@ -622,7 +634,8 @@ function createComplianceRouter(upload) {
try { try {
const { rows: metricRows } = await pool.query( const { rows: metricRows } = await pool.query(
`SELECT ci.metric_id, ci.metric_desc, ci.category, ci.status, ci.ip_address, ci.device_type, ci.team, ci.seen_count, ci.extra_json, `SELECT DISTINCT ON (ci.metric_id, ci.status)
ci.metric_id, ci.metric_desc, ci.category, ci.status, ci.ip_address, ci.device_type, ci.team, ci.seen_count, ci.extra_json,
ci.resolution_date, ci.remediation_plan, ci.resolution_date, ci.remediation_plan,
fu.report_date AS first_seen, fu.uploaded_at AS first_seen_at, lu.report_date AS last_seen, lu.uploaded_at AS last_seen_at, ru.report_date AS resolved_on fu.report_date AS first_seen, fu.uploaded_at AS first_seen_at, lu.report_date AS last_seen, lu.uploaded_at AS last_seen_at, ru.report_date AS resolved_on
FROM compliance_items ci FROM compliance_items ci
@@ -630,10 +643,16 @@ function createComplianceRouter(upload) {
LEFT JOIN compliance_uploads lu ON ci.upload_id = lu.id LEFT JOIN compliance_uploads lu ON ci.upload_id = lu.id
LEFT JOIN compliance_uploads ru ON ci.resolved_upload_id = ru.id LEFT JOIN compliance_uploads ru ON ci.resolved_upload_id = ru.id
WHERE ci.hostname = $1 WHERE ci.hostname = $1
ORDER BY ci.status DESC, ci.metric_id`, [hostname] ORDER BY ci.metric_id, ci.status, ci.seen_count DESC, ci.upload_id DESC`, [hostname]
); );
if (metricRows.length === 0) return res.status(404).json({ error: 'Device not found' }); if (metricRows.length === 0) return res.status(404).json({ error: 'Device not found' });
// Reproduce original ORDER BY ci.status DESC, ci.metric_id on the deduped rows
metricRows.sort((a, b) => {
if (a.status !== b.status) return b.status.localeCompare(a.status);
return a.metric_id.localeCompare(b.metric_id);
});
const metrics = metricRows.map(r => ({ ...r, extra: (() => { try { return JSON.parse(r.extra_json || '{}'); } catch { return {}; } })(), extra_json: undefined })); const metrics = metricRows.map(r => ({ ...r, extra: (() => { try { return JSON.parse(r.extra_json || '{}'); } catch { return {}; } })(), extra_json: undefined }));
const { rows: notes } = await pool.query( const { rows: notes } = await pool.query(
@@ -854,7 +873,13 @@ function createComplianceRouter(upload) {
*/ */
router.get('/mttr', async (req, res) => { router.get('/mttr', async (req, res) => {
try { try {
const { rows } = await pool.query(`SELECT COALESCE(seen_count, 1) AS seen_count, team FROM compliance_items WHERE status = 'active'`); const { rows } = await pool.query(
`SELECT DISTINCT ON (hostname, metric_id)
COALESCE(seen_count, 1) AS seen_count, team
FROM compliance_items
WHERE status = 'active'
ORDER BY hostname, metric_id, seen_count DESC, upload_id DESC`
);
if (rows.length === 0) return res.json({ aging: [] }); if (rows.length === 0) return res.json({ aging: [] });
const aging = bucketAgingItems(rows); const aging = bucketAgingItems(rows);
res.json({ aging }); res.json({ aging });
@@ -1105,13 +1130,21 @@ function createComplianceRouter(upload) {
const donut = categorizeNonCompliant(donutRows); const donut = categorizeNonCompliant(donutRows);
// Heavy hitters: group by team, count non-compliant DEVICES per team // Heavy hitters: group by team, count non-compliant DEVICES per team
// CTE deduplicates hostnames to one team via representative row (highest seen_count, most recent upload_id)
const { rows: teamRows } = await pool.query(` const { rows: teamRows } = await pool.query(`
SELECT WITH device_team AS (
SELECT DISTINCT ON (hostname)
hostname,
COALESCE(team, 'Unknown') AS team, COALESCE(team, 'Unknown') AS team,
COUNT(DISTINCT hostname) AS non_compliant, resolution_date
MAX(resolution_date) AS compliance_date
FROM compliance_items FROM compliance_items
WHERE status = 'active' WHERE status = 'active'
ORDER BY hostname, seen_count DESC, upload_id DESC
)
SELECT team,
COUNT(DISTINCT hostname)::int AS non_compliant,
MAX(resolution_date) AS compliance_date
FROM device_team
GROUP BY team GROUP BY team
ORDER BY COUNT(DISTINCT hostname) DESC ORDER BY COUNT(DISTINCT hostname) DESC
`); `);
@@ -1129,18 +1162,28 @@ function createComplianceRouter(upload) {
const team = teamRow.team; const team = teamRow.team;
const teamNonCompliant = parseInt(teamRow.non_compliant); const teamNonCompliant = parseInt(teamRow.non_compliant);
// Get total devices for this team (all statuses) // Get total devices for this team (all statuses) — CTE deduplicates hostnames to one team
const { rows: teamTotalRows } = await pool.query( const { rows: teamTotalRows } = await pool.query(
`SELECT COUNT(DISTINCT hostname) AS total FROM compliance_items WHERE COALESCE(team, 'Unknown') = $1`, `WITH device_team AS (
SELECT DISTINCT ON (hostname)
hostname,
COALESCE(team, 'Unknown') AS team
FROM compliance_items
ORDER BY hostname, seen_count DESC, upload_id DESC
)
SELECT COUNT(*)::int AS total FROM device_team WHERE team = $1`,
[team] [team]
); );
const teamTotal = parseInt(teamTotalRows[0]?.total) || 0; const teamTotal = parseInt(teamTotalRows[0]?.total) || 0;
const teamCompliant = teamTotal - teamNonCompliant; const teamCompliant = teamTotal - teamNonCompliant;
const compliance_pct_team = teamTotal > 0 ? Math.round((teamCompliant / teamTotal) * 100) : 0; const compliance_pct_team = teamTotal > 0 ? Math.round((teamCompliant / teamTotal) * 100) : 0;
// Forecast burndown from resolution_dates // Forecast burndown from resolution_dates — DISTINCT ON deduplicates cross-vertical (hostname, metric_id) pairs
const { rows: forecastItems } = await pool.query( const { rows: forecastItems } = await pool.query(
`SELECT resolution_date FROM compliance_items WHERE status = 'active' AND COALESCE(team, 'Unknown') = $1 AND resolution_date IS NOT NULL`, `SELECT DISTINCT ON (hostname, metric_id) resolution_date
FROM compliance_items
WHERE status = 'active' AND COALESCE(team, 'Unknown') = $1 AND resolution_date IS NOT NULL
ORDER BY hostname, metric_id, seen_count DESC, upload_id DESC`,
[team] [team]
); );
const forecast_burndown = computeForecastBurndown(forecastItems); const forecast_burndown = computeForecastBurndown(forecastItems);