Fix duplicate failing metrics on same asset across compliance endpoints

Deduplicate (hostname, metric_id) rows across verticals using DISTINCT ON in
GET /items, GET /items/:hostname, GET /vcl/stats (heavy-hitters + forecast),
GET /mttr, and persistUpload() snapshot block. Add defensive groupByHostname
Set and hostname_status CTE for snapshot classification.

Includes 38 property-based tests (11 exploration + 27 preservation) covering
all six affected sites.

Closes #13
This commit is contained in:
Jordan Ramos
2026-05-18 15:57:10 -06:00
parent da5505bd27
commit 520f50fbbf
3 changed files with 2242 additions and 28 deletions

View File

@@ -160,18 +160,24 @@ async function persistUpload({ items, summary, reportDate, filename, userId, ver
const currentMonth = new Date().toISOString().slice(0, 7); // YYYY-MM
// Compute compliance percentages for the snapshotted vertical only.
// `IS NOT DISTINCT FROM` matches the legacy `vertical IS NULL` case
// when the upload is an AEO-only upload (vertical = null), so the
// single-vertical-month preservation path keeps its previous
// semantics.
// CTE classifies each hostname by its worst-case status (active wins
// over resolved via MIN) so a hostname with both active and resolved
// rows across verticals is counted in exactly one column.
const { rows: verticalStats } = await pool.query(
`SELECT vertical, team,
COUNT(DISTINCT hostname)::int AS total_devices,
COUNT(DISTINCT CASE WHEN status = 'resolved' THEN hostname END)::int AS compliant,
COUNT(DISTINCT CASE WHEN status = 'active' THEN hostname END)::int AS non_compliant
`WITH hostname_status AS (
SELECT team,
hostname,
MIN(status) AS status
FROM compliance_items
WHERE team IS NOT NULL AND vertical IS NOT DISTINCT FROM $1
GROUP BY vertical, team`,
GROUP BY team, hostname
)
SELECT team AS vertical,
COUNT(*)::int AS total_devices,
COUNT(*) FILTER (WHERE status = 'resolved')::int AS compliant,
COUNT(*) FILTER (WHERE status = 'active')::int AS non_compliant
FROM hostname_status
GROUP BY team`,
[vertical]
);
@@ -217,18 +223,22 @@ function groupByHostname(rows, noteHostnames) {
deviceMap[row.hostname] = {
hostname: row.hostname, ip_address: row.ip_address || '', device_type: row.device_type || '',
team: row.team || '', status: row.status, failing_metrics: [],
_seenMetricIds: new Set(),
seen_count: row.seen_count || 1, first_seen: row.first_seen || null,
last_seen: row.last_seen || null, resolved_on: row.resolved_on || null,
has_notes: noteHostnames.has(row.hostname),
};
}
const dev = deviceMap[row.hostname];
if (!dev._seenMetricIds.has(row.metric_id)) {
dev._seenMetricIds.add(row.metric_id);
dev.failing_metrics.push({ metric_id: row.metric_id, metric_desc: row.metric_desc || '', category: row.category || '' });
}
if ((row.seen_count || 1) > dev.seen_count) dev.seen_count = row.seen_count;
if (row.first_seen && (!dev.first_seen || row.first_seen < dev.first_seen)) dev.first_seen = row.first_seen;
if (row.last_seen && (!dev.last_seen || row.last_seen > dev.last_seen)) dev.last_seen = row.last_seen;
}
return Object.values(deviceMap);
return Object.values(deviceMap).map(({ _seenMetricIds, ...dev }) => dev);
}
// ---------------------------------------------------------------------------
@@ -584,15 +594,17 @@ function createComplianceRouter(upload) {
try {
// Include items from both AEO uploads (vertical IS NULL) and NTS_AEO multi-vertical uploads
// DISTINCT ON deduplicates cross-vertical (hostname, metric_id) pairs, keeping the representative row
const { rows } = await pool.query(
`SELECT ci.hostname, ci.ip_address, ci.device_type, ci.team, ci.metric_id, ci.metric_desc, ci.category, ci.status, ci.seen_count,
`SELECT DISTINCT ON (ci.hostname, ci.metric_id)
ci.hostname, ci.ip_address, ci.device_type, ci.team, ci.metric_id, ci.metric_desc, ci.category, ci.status, ci.seen_count,
fu.report_date AS first_seen, lu.report_date AS last_seen, ru.report_date AS resolved_on
FROM compliance_items ci
LEFT JOIN compliance_uploads fu ON ci.first_seen_upload_id = fu.id
LEFT JOIN compliance_uploads lu ON ci.upload_id = lu.id
LEFT JOIN compliance_uploads ru ON ci.resolved_upload_id = ru.id
WHERE ci.team = $1 AND ci.status = $2 AND (ci.vertical IS NULL OR ci.vertical = 'NTS_AEO')
ORDER BY ci.hostname, ci.metric_id`,
ORDER BY ci.hostname, ci.metric_id, ci.seen_count DESC, ci.upload_id DESC`,
[team, status]
);
@@ -622,7 +634,8 @@ function createComplianceRouter(upload) {
try {
const { rows: metricRows } = await pool.query(
`SELECT ci.metric_id, ci.metric_desc, ci.category, ci.status, ci.ip_address, ci.device_type, ci.team, ci.seen_count, ci.extra_json,
`SELECT DISTINCT ON (ci.metric_id, ci.status)
ci.metric_id, ci.metric_desc, ci.category, ci.status, ci.ip_address, ci.device_type, ci.team, ci.seen_count, ci.extra_json,
ci.resolution_date, ci.remediation_plan,
fu.report_date AS first_seen, fu.uploaded_at AS first_seen_at, lu.report_date AS last_seen, lu.uploaded_at AS last_seen_at, ru.report_date AS resolved_on
FROM compliance_items ci
@@ -630,10 +643,16 @@ function createComplianceRouter(upload) {
LEFT JOIN compliance_uploads lu ON ci.upload_id = lu.id
LEFT JOIN compliance_uploads ru ON ci.resolved_upload_id = ru.id
WHERE ci.hostname = $1
ORDER BY ci.status DESC, ci.metric_id`, [hostname]
ORDER BY ci.metric_id, ci.status, ci.seen_count DESC, ci.upload_id DESC`, [hostname]
);
if (metricRows.length === 0) return res.status(404).json({ error: 'Device not found' });
// Reproduce original ORDER BY ci.status DESC, ci.metric_id on the deduped rows
metricRows.sort((a, b) => {
if (a.status !== b.status) return b.status.localeCompare(a.status);
return a.metric_id.localeCompare(b.metric_id);
});
const metrics = metricRows.map(r => ({ ...r, extra: (() => { try { return JSON.parse(r.extra_json || '{}'); } catch { return {}; } })(), extra_json: undefined }));
const { rows: notes } = await pool.query(
@@ -854,7 +873,13 @@ function createComplianceRouter(upload) {
*/
router.get('/mttr', async (req, res) => {
try {
const { rows } = await pool.query(`SELECT COALESCE(seen_count, 1) AS seen_count, team FROM compliance_items WHERE status = 'active'`);
const { rows } = await pool.query(
`SELECT DISTINCT ON (hostname, metric_id)
COALESCE(seen_count, 1) AS seen_count, team
FROM compliance_items
WHERE status = 'active'
ORDER BY hostname, metric_id, seen_count DESC, upload_id DESC`
);
if (rows.length === 0) return res.json({ aging: [] });
const aging = bucketAgingItems(rows);
res.json({ aging });
@@ -1105,13 +1130,21 @@ function createComplianceRouter(upload) {
const donut = categorizeNonCompliant(donutRows);
// Heavy hitters: group by team, count non-compliant DEVICES per team
// CTE deduplicates hostnames to one team via representative row (highest seen_count, most recent upload_id)
const { rows: teamRows } = await pool.query(`
SELECT
WITH device_team AS (
SELECT DISTINCT ON (hostname)
hostname,
COALESCE(team, 'Unknown') AS team,
COUNT(DISTINCT hostname) AS non_compliant,
MAX(resolution_date) AS compliance_date
resolution_date
FROM compliance_items
WHERE status = 'active'
ORDER BY hostname, seen_count DESC, upload_id DESC
)
SELECT team,
COUNT(DISTINCT hostname)::int AS non_compliant,
MAX(resolution_date) AS compliance_date
FROM device_team
GROUP BY team
ORDER BY COUNT(DISTINCT hostname) DESC
`);
@@ -1129,18 +1162,28 @@ function createComplianceRouter(upload) {
const team = teamRow.team;
const teamNonCompliant = parseInt(teamRow.non_compliant);
// Get total devices for this team (all statuses)
// Get total devices for this team (all statuses) — CTE deduplicates hostnames to one team
const { rows: teamTotalRows } = await pool.query(
`SELECT COUNT(DISTINCT hostname) AS total FROM compliance_items WHERE COALESCE(team, 'Unknown') = $1`,
`WITH device_team AS (
SELECT DISTINCT ON (hostname)
hostname,
COALESCE(team, 'Unknown') AS team
FROM compliance_items
ORDER BY hostname, seen_count DESC, upload_id DESC
)
SELECT COUNT(*)::int AS total FROM device_team WHERE team = $1`,
[team]
);
const teamTotal = parseInt(teamTotalRows[0]?.total) || 0;
const teamCompliant = teamTotal - teamNonCompliant;
const compliance_pct_team = teamTotal > 0 ? Math.round((teamCompliant / teamTotal) * 100) : 0;
// Forecast burndown from resolution_dates
// Forecast burndown from resolution_dates — DISTINCT ON deduplicates cross-vertical (hostname, metric_id) pairs
const { rows: forecastItems } = await pool.query(
`SELECT resolution_date FROM compliance_items WHERE status = 'active' AND COALESCE(team, 'Unknown') = $1 AND resolution_date IS NOT NULL`,
`SELECT DISTINCT ON (hostname, metric_id) resolution_date
FROM compliance_items
WHERE status = 'active' AND COALESCE(team, 'Unknown') = $1 AND resolution_date IS NOT NULL
ORDER BY hostname, metric_id, seen_count DESC, upload_id DESC`,
[team]
);
const forecast_burndown = computeForecastBurndown(forecastItems);