feat(monitoring): resolve Loki-stack syslog ingestion with rsyslog filter fix

Fixed critical issue preventing UniFi router logs from reaching Loki/Promtail/Grafana.

Root Cause:
- rsyslog filter in /etc/rsyslog.d/unifi-router.conf filtered for 192.168.1.1
- VM 101 on VLAN 2, actual source IP is 192.168.2.1 (VLAN 2 gateway)
- Filter silently rejected all incoming syslog traffic

Solution:
- Updated rsyslog filter from 192.168.1.1 to 192.168.2.1
- Logs now flow: UniFi → rsyslog → Promtail → Loki → Grafana

Changes:
- Add services/loki-stack/* - Complete Loki/Promtail/Grafana stack configs
- Add services/logward/* - Logward service configuration
- Update troubleshooting/loki-stack-bugfix.md - Complete 5-phase resolution
- Update CLAUDE_STATUS.md - Document 2025-12-11 resolution
- Update sub-agents/scribe.md - Agent improvements
- Remove services/promtail-config.yml - Duplicate file cleanup

Status:  Monitoring stack fully operational, syslog ingestion active

Technical Details: See troubleshooting/loki-stack-bugfix.md for complete analysis

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-11 13:56:27 -07:00
parent 698a5b531a
commit 892684c46e
8 changed files with 526 additions and 3 deletions

View File

@@ -0,0 +1,62 @@
# Database
DATABASE_URL=postgresql://logward:password@localhost:5432/logward
DB_NAME=logward
DB_USER=logward
DB_PASSWORD=Nbkx4mdmay1)
# Redis
REDIS_PASSWORD=Nbkx4mdmay1)
REDIS_URL=redis://:Nbkx4mdmay1)@localhost:6379
# API
API_KEY_SECRET=XEZV6seqamKGb1JaCBCYGLopC9xMC9d8
PORT=8080
HOST=0.0.0.0
# SMTP (configure for email alerts)
SMTP_HOST=smtp.example.com
SMTP_PORT=587
SMTP_USER=your_email@example.com
SMTP_PASS=your_smtp_password
SMTP_FROM=noreply@logward.local
# Rate Limiting
RATE_LIMIT_MAX=1000
RATE_LIMIT_WINDOW=60000
# Environment
NODE_ENV=development
# Internal Logging (Self-Monitoring)
# Enable/disable internal logging (logs LogWard's own requests/errors)
INTERNAL_LOGGING_ENABLED=true
# API key for internal logging project (auto-generated on first run if not set)
# After first run, copy the generated key from console output and set it here
# INTERNAL_API_KEY=lp_your_generated_api_key_here
# API URL for internal logging (defaults to API_URL if not set)
# INTERNAL_LOGGING_API_URL=http://localhost:8080
# Service name (distinguishes backend from worker in logs)
# Backend: logward-backend (default)
# Worker: logward-worker
SERVICE_NAME=logward-backend
# Frontend (SvelteKit)
# Public API URL for frontend to connect to backend
PUBLIC_API_URL=http://localhost:8080
# GitHub API Token (optional - for SigmaHQ integration)
# Without token: 60 requests/hour rate limit
# With token: 5000 requests/hour rate limit
# Create token at: https://github.com/settings/tokens (no scopes needed for public repos)
# GITHUB_TOKEN=ghp_your_github_personal_access_token_here
# Docker Images (optional - specify custom images or versions)
# By default, uses latest from Docker Hub
# Available registries:
# - Docker Hub: logward/backend:latest, logward/frontend:latest
# - GHCR: ghcr.io/logward-dev/logward-backend:latest, ghcr.io/logward-dev/logward-frontend:latest
# LOGWARD_BACKEND_IMAGE=logward/backend:0.2.4
# LOGWARD_FRONTEND_IMAGE=logward/frontend:0.2.4

View File

@@ -0,0 +1,174 @@
version: '3.8'
services:
postgres:
image: timescale/timescaledb:latest-pg16
container_name: logward-postgres
environment:
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
command:
- "postgres"
- "-c"
- "max_connections=100"
- "-c"
- "shared_buffers=256MB"
- "-c"
- "effective_cache_size=768MB"
- "-c"
- "work_mem=16MB"
- "-c"
- "maintenance_work_mem=128MB"
# Parallel query settings for faster aggregations
- "-c"
- "max_parallel_workers_per_gather=4"
- "-c"
- "max_parallel_workers=8"
- "-c"
- "parallel_tuple_cost=0.01"
- "-c"
- "parallel_setup_cost=100"
- "-c"
- "min_parallel_table_scan_size=8MB"
# Write-ahead log tuning for ingestion
- "-c"
- "wal_buffers=16MB"
- "-c"
- "checkpoint_completion_target=0.9"
# Logging for slow queries (>100ms)
- "-c"
- "log_min_duration_statement=100"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- logward-network
redis:
image: redis:7-alpine
container_name: logward-redis
command: redis-server --requirepass ${REDIS_PASSWORD}
ports:
- "6379:6379"
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "sh", "-c", "redis-cli -a $${REDIS_PASSWORD} ping | grep -q PONG"]
interval: 10s
timeout: 3s
retries: 5
restart: unless-stopped
networks:
- logward-network
backend:
image: ${LOGWARD_BACKEND_IMAGE:-logward/backend:latest}
container_name: logward-backend
ports:
- "8080:8080"
environment:
NODE_ENV: production
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}
DATABASE_HOST: postgres
DB_USER: ${DB_USER}
REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379
API_KEY_SECRET: ${API_KEY_SECRET}
PORT: 8080
HOST: 0.0.0.0
SMTP_HOST: ${SMTP_HOST:-}
SMTP_PORT: ${SMTP_PORT:-587}
SMTP_USER: ${SMTP_USER:-}
SMTP_PASS: ${SMTP_PASS:-}
SMTP_FROM: ${SMTP_FROM:-noreply@logward.local}
INTERNAL_LOGGING_ENABLED: ${INTERNAL_LOGGING_ENABLED:-false}
INTERNAL_API_KEY: ${INTERNAL_API_KEY:-}
SERVICE_NAME: logward-backend
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
networks:
- logward-network
worker:
image: ${LOGWARD_BACKEND_IMAGE:-logward/backend:latest}
container_name: logward-worker
command: ["worker"]
healthcheck:
disable: true
environment:
NODE_ENV: production
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}
DATABASE_HOST: postgres
DB_USER: ${DB_USER}
REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379
API_KEY_SECRET: ${API_KEY_SECRET}
SMTP_HOST: ${SMTP_HOST:-}
SMTP_PORT: ${SMTP_PORT:-587}
SMTP_USER: ${SMTP_USER:-}
SMTP_PASS: ${SMTP_PASS:-}
SMTP_FROM: ${SMTP_FROM:-noreply@logward.local}
INTERNAL_LOGGING_ENABLED: ${INTERNAL_LOGGING_ENABLED:-false}
INTERNAL_API_KEY: ${INTERNAL_API_KEY:-}
SERVICE_NAME: logward-worker
depends_on:
backend:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
networks:
- logward-network
frontend:
image: ${LOGWARD_FRONTEND_IMAGE:-logward/frontend:latest}
container_name: logward-frontend
ports:
- "3001:3001"
environment:
NODE_ENV: production
PUBLIC_API_URL: ${PUBLIC_API_URL:-http://localhost:8080}
depends_on:
- backend
restart: unless-stopped
networks:
- logward-network
fluent-bit:
image: fluent/fluent-bit:latest
container_name: logward-fluent-bit
volumes:
- ./fluent-bit.conf:/fluent-bit/etc/fluent-bit.conf:ro
- ./parsers.conf:/fluent-bit/etc/parsers.conf:ro
- ./extract_container_id.lua:/fluent-bit/etc/extract_container_id.lua:ro
- ./wrap_logs.lua:/fluent-bit/etc/wrap_logs.lua:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
environment:
LOGWARD_API_KEY: ${FLUENT_BIT_API_KEY:-}
LOGWARD_API_HOST: backend
depends_on:
- backend
restart: unless-stopped
networks:
- logward-network
volumes:
postgres_data:
driver: local
redis_data:
driver: local
networks:
logward-network:
driver: bridge

View File

@@ -0,0 +1,33 @@
version: '3.8'
services:
loki:
image: grafana/loki:latest
container_name: loki
ports:
- "3100:3100"
volumes:
- /home/server-admin/loki-stack/loki-config.yaml:/etc/loki/local-config.yaml
command: -config.file=/etc/loki/local-config.yaml
networks:
- monitoring-net
restart: unless-stopped
promtail:
image: grafana/promtail:latest
container_name: promtail
volumes:
- /home/server-admin/loki-stack/promtail-config.yaml:/etc/promtail/config.yaml
ports:
- "1514:1514" # Syslog port exposed to the host
- "9080:9080"
command: -config.file=/etc/promtail/config.yaml
networks:
- monitoring-net
restart: unless-stopped
networks:
monitoring-net:
external: true

View File

@@ -0,0 +1,35 @@
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
instance_addr: 127.0.0.1
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
schema_config:
configs:
- from: 2020-10-24
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
compactor:
working_directory: /loki/boltdb-shipper-compactor
retention_enabled: true
delete_request_store: filesystem # <--- This fixes the error you are seeing
limits_config:
retention_period: 336h

View File

@@ -0,0 +1,22 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
- job_name: syslog_ingest
syslog:
listen_address: 0.0.0.0:1514
listen_protocol: tcp # We only listen on TCP now
idle_timeout: 60s
label_structured_data: yes
labels:
job: "syslog_combined" # One job for both Proxmox and UniFi
relabel_configs:
- source_labels: ['__syslog_message_hostname']
target_label: 'host'