diff --git a/deployment-quickstart/UPTIME_MONITORING_SETUP.md b/deployment-quickstart/UPTIME_MONITORING_SETUP.md new file mode 100644 index 00000000..3f2bf149 --- /dev/null +++ b/deployment-quickstart/UPTIME_MONITORING_SETUP.md @@ -0,0 +1,186 @@ +# External Uptime Monitoring Setup Guide + +This guide explains how to set up external uptime monitoring for the Tractatus Umami Analytics instance. + +## Monitored Endpoints + +### Primary Monitoring Target +- **URL**: `https://analytics.agenticgovernance.digital/api/heartbeat` +- **Expected Response**: HTTP 200 OK +- **Purpose**: Umami application health check + +### Secondary Monitoring Targets (Optional) +- **URL**: `https://agenticgovernance.digital/` +- **Expected Response**: HTTP 200 OK +- **Purpose**: Main website availability + +## Recommended Service: UptimeRobot (Free Tier) + +UptimeRobot provides free uptime monitoring with: +- 50 monitors +- 5-minute check intervals +- Email/SMS alerts +- Status page generation + +### Setup Instructions + +#### 1. Create Account +1. Visit https://uptimerobot.com +2. Sign up for a free account +3. Verify your email address + +#### 2. Add Analytics Monitor +1. Click "Add New Monitor" +2. Configure: + - **Monitor Type**: HTTP(s) + - **Friendly Name**: `Tractatus Analytics (Umami)` + - **URL**: `https://analytics.agenticgovernance.digital/api/heartbeat` + - **Monitoring Interval**: 5 minutes + - **Monitor Timeout**: 30 seconds + - **HTTP Method**: GET + - **Expected Status Code**: 200 + +3. Click "Create Monitor" + +#### 3. Add Main Website Monitor (Optional) +1. Click "Add New Monitor" +2. Configure: + - **Monitor Type**: HTTP(s) + - **Friendly Name**: `Tractatus Website` + - **URL**: `https://agenticgovernance.digital/` + - **Monitoring Interval**: 5 minutes + - **Monitor Timeout**: 30 seconds + +3. Click "Create Monitor" + +#### 4. Configure Alert Contacts +1. Go to "My Settings" → "Alert Contacts" +2. Add email address for alerts +3. (Optional) Add SMS number for critical alerts +4. Configure alert preferences: + - **Alert When**: Down + - **Alert After**: 2 consecutive failures (10 minutes) + - **Re-Alert After**: 30 minutes + +#### 5. Create Public Status Page (Optional) +1. Go to "Status Pages" +2. Click "Add Status Page" +3. Configure: + - **Title**: Tractatus Services Status + - **Custom Domain**: (optional) status.agenticgovernance.digital + - **Monitors**: Select both monitors +4. Enable "Show Uptime Percentage" +5. Enable "Show Response Times" + +## Alternative Services + +### Pingdom +- **Free Tier**: 1 monitor +- **Check Interval**: 1 minute +- **URL**: https://www.pingdom.com + +### Better Uptime +- **Free Tier**: 10 monitors +- **Check Interval**: 3 minutes +- **URL**: https://betteruptime.com + +### StatusCake +- **Free Tier**: 10 monitors +- **Check Interval**: 5 minutes +- **URL**: https://www.statuscake.com + +## Internal Monitoring (Already Configured) + +The following internal monitoring is already set up: + +### Docker Health Checks +- **Umami Container**: `curl -f http://localhost:3000/api/heartbeat` + - Interval: 10 seconds + - Timeout: 5 seconds + - Retries: 5 + +- **PostgreSQL Container**: `pg_isready -U $POSTGRES_USER -d $POSTGRES_DB` + - Interval: 5 seconds + - Timeout: 5 seconds + - Retries: 5 + +### Automated Backups +- **Schedule**: Daily at 2:00 AM +- **Retention**: 7 days +- **Location**: `~/umami-backups/` +- **Script**: `~/umami-deployment/backup-umami-db.sh` + +### Disk Usage Monitoring +- **Schedule**: Daily at 3:00 AM +- **Warning Threshold**: 80% disk usage +- **Critical Threshold**: 90% disk usage +- **Location**: `~/umami-backups/disk-monitoring.log` +- **Script**: `~/umami-deployment/monitor-disk-usage.sh` + +## Verification + +To verify monitoring is working: + +1. **Check Endpoint Manually**: +```bash +curl -I https://analytics.agenticgovernance.digital/api/heartbeat +# Should return: HTTP/2 200 +``` + +2. **Test Alert Flow**: + - Stop Umami container: `docker stop tractatus-umami` + - Wait for alert (should arrive within 10 minutes) + - Restart container: `docker start tractatus-umami` + - Verify recovery alert + +3. **Check Internal Monitoring**: +```bash +# View Docker health status +docker ps + +# Check backup logs +tail -20 ~/umami-backups/backup.log + +# Check disk monitoring logs +tail -20 ~/umami-backups/disk-monitoring.log +``` + +## Alert Response Procedures + +### Analytics Down (5+ minutes) +1. Check Docker container status: `docker ps` +2. Check container logs: `docker logs tractatus-umami` +3. Check PostgreSQL status: `docker logs tractatus-umami-db` +4. If needed, restart: `cd ~/umami-deployment && docker compose restart` + +### High Disk Usage (>80%) +1. Check backup retention: `ls -lh ~/umami-backups/` +2. Remove old backups manually if needed +3. Check PostgreSQL volume: `docker exec tractatus-umami-db du -sh /var/lib/postgresql/data` +4. Consider database cleanup or server upgrade + +### Database Corruption +1. Stop Umami: `docker compose stop umami` +2. Restore from backup: `~/umami-deployment/restore-umami-db.sh ~/umami-backups/umami_backup_YYYYMMDD_HHMMSS.sql.gz` +3. Restart services: `docker compose up -d` + +## Next Steps + +- [ ] Sign up for UptimeRobot +- [ ] Add analytics.agenticgovernance.digital monitor +- [ ] Configure email alerts +- [ ] Test alert delivery +- [ ] (Optional) Create public status page +- [ ] Document response procedures in team wiki + +## Maintenance + +- Review monitoring logs monthly +- Test restore procedure quarterly +- Update alert contacts when team changes +- Review disk usage trends monthly + +--- + +**Last Updated**: 2025-10-29 +**Monitoring Status**: Internal monitoring active, external monitoring pending user setup diff --git a/public/admin/disk-monitoring.html b/public/admin/disk-monitoring.html new file mode 100644 index 00000000..9ae9ed85 --- /dev/null +++ b/public/admin/disk-monitoring.html @@ -0,0 +1,95 @@ + + + + + + Disk Monitoring | Tractatus Admin + + + + + + + +
+ + + +
+ + +
+
+

Disk Monitoring

+

Real-time disk usage metrics for development and production systems

+
+ +
+ + +
+
+

Loading metrics...

+
+ + + + + + + +
+ + + + diff --git a/public/js/admin-disk-monitoring.js b/public/js/admin-disk-monitoring.js new file mode 100644 index 00000000..872d1222 --- /dev/null +++ b/public/js/admin-disk-monitoring.js @@ -0,0 +1,213 @@ +// Disk Monitoring - Admin UI +// CSP-compliant implementation using DOM manipulation + +async function loadMetrics() { + const loading = document.getElementById('loading'); + const metricsContainer = document.getElementById('metrics-container'); + const errorDiv = document.getElementById('error'); + + try { + loading.classList.remove('hidden'); + metricsContainer.classList.add('hidden'); + errorDiv.classList.add('hidden'); + + const token = localStorage.getItem('token'); + const response = await fetch('/api/admin/disk-metrics', { + headers: { 'Authorization': 'Bearer ' + token } + }); + + if (!response.ok) { + throw new Error('Failed to fetch metrics: ' + response.status); + } + + const result = await response.json(); + + if (!result.success) { + throw new Error(result.error || 'Unknown error'); + } + + renderMetrics(result.data); + + loading.classList.add('hidden'); + metricsContainer.classList.remove('hidden'); + + } catch (err) { + console.error('Load metrics error:', err); + loading.classList.add('hidden'); + errorDiv.classList.remove('hidden'); + + const errorMessage = document.getElementById('error-message'); + errorMessage.textContent = err.message || 'An unexpected error occurred'; + } +} + +function renderMetrics(data) { + const localContainer = document.getElementById('local-metrics'); + const remoteContainer = document.getElementById('remote-metrics'); + + // Clear existing content + localContainer.textContent = ''; + remoteContainer.textContent = ''; + + // Render local metrics + if (data.local) { + renderSystemMetrics(localContainer, data.local, 'Local Development'); + } else { + renderError(localContainer, 'Local metrics unavailable'); + } + + // Render remote metrics + if (data.remote) { + renderSystemMetrics(remoteContainer, data.remote, 'Production VPS'); + } else { + renderError(remoteContainer, 'Remote metrics unavailable'); + } +} + +function renderSystemMetrics(container, metrics, label) { + // Disk Usage Card + const diskCard = createMetricCard( + 'Disk Usage', + metrics.health, + [ + { label: 'Total', value: metrics.total }, + { label: 'Used', value: metrics.used }, + { label: 'Available', value: metrics.available }, + { label: 'Usage', value: metrics.usedPercent + '%', progress: metrics.usedPercent } + ] + ); + container.appendChild(diskCard); + + // Memory Card + if (metrics.memory) { + const memoryCard = createMetricCard( + 'Memory', + { level: metrics.memory.usedPercent >= 90 ? 'critical' : metrics.memory.usedPercent >= 80 ? 'warning' : 'healthy' }, + [ + { label: 'Total', value: metrics.memory.total }, + { label: 'Used', value: metrics.memory.usedPercent + '%', progress: metrics.memory.usedPercent } + ] + ); + container.appendChild(memoryCard); + } + + // System Info Card + const sysCard = createMetricCard( + 'System Info', + null, + [ + { label: 'Hostname', value: metrics.hostname || 'Unknown' }, + { label: 'Platform', value: metrics.platform || 'Unknown' }, + { label: 'Uptime', value: (metrics.uptime || 0) + ' hours' } + ] + ); + container.appendChild(sysCard); + + // Docker Volumes (if present) + if (metrics.docker) { + const dockerCard = createMetricCard( + 'Docker Volumes', + null, + [ + { label: 'Total', value: metrics.docker.total }, + { label: 'Used', value: metrics.docker.used } + ] + ); + container.appendChild(dockerCard); + } +} + +function createMetricCard(title, health, items) { + const card = document.createElement('div'); + card.className = 'metric-card bg-white rounded-lg shadow-lg p-6'; + + // Card header + const header = document.createElement('div'); + header.className = 'flex items-center justify-between mb-4'; + + const titleEl = document.createElement('h3'); + titleEl.className = 'text-lg font-semibold text-gray-900'; + titleEl.textContent = title; + header.appendChild(titleEl); + + // Health indicator (if present) + if (health) { + const indicator = document.createElement('span'); + indicator.className = 'health-indicator health-' + health.level; + indicator.title = health.level.charAt(0).toUpperCase() + health.level.slice(1); + header.appendChild(indicator); + } + + card.appendChild(header); + + // Card content + items.forEach(item => { + const row = document.createElement('div'); + row.className = 'mb-3'; + + const labelDiv = document.createElement('div'); + labelDiv.className = 'flex justify-between text-sm mb-1'; + + const labelSpan = document.createElement('span'); + labelSpan.className = 'text-gray-600'; + labelSpan.textContent = item.label; + labelDiv.appendChild(labelSpan); + + const valueSpan = document.createElement('span'); + valueSpan.className = 'font-semibold text-gray-900'; + valueSpan.textContent = item.value; + labelDiv.appendChild(valueSpan); + + row.appendChild(labelDiv); + + // Progress bar (if present) + if (item.progress !== undefined) { + const progressBg = document.createElement('div'); + progressBg.className = 'w-full bg-gray-200 rounded-full h-2'; + + const progressBar = document.createElement('div'); + progressBar.className = 'progress-bar h-2 rounded-full ' + getProgressColor(item.progress); + progressBar.style.width = item.progress + '%'; + + progressBg.appendChild(progressBar); + row.appendChild(progressBg); + } + + card.appendChild(row); + }); + + return card; +} + +function renderError(container, message) { + const errorDiv = document.createElement('div'); + errorDiv.className = 'col-span-3 bg-yellow-50 border-l-4 border-yellow-500 p-4 rounded'; + + const errorText = document.createElement('p'); + errorText.className = 'text-yellow-800'; + errorText.textContent = '⚠️ ' + message; + + errorDiv.appendChild(errorText); + container.appendChild(errorDiv); +} + +function getProgressColor(percent) { + if (percent >= 90) return 'bg-red-600'; + if (percent >= 80) return 'bg-orange-500'; + if (percent >= 70) return 'bg-yellow-500'; + return 'bg-green-600'; +} + +// Refresh functionality +document.addEventListener('DOMContentLoaded', () => { + loadMetrics(); + + // Refresh button + const refreshBtn = document.getElementById('refresh-btn'); + if (refreshBtn) { + refreshBtn.addEventListener('click', loadMetrics); + } + + // Auto-refresh every 5 minutes + setInterval(loadMetrics, 5 * 60 * 1000); +}); diff --git a/public/js/components/navbar-admin.js b/public/js/components/navbar-admin.js index 246fcf4a..b162f689 100644 --- a/public/js/components/navbar-admin.js +++ b/public/js/components/navbar-admin.js @@ -31,7 +31,8 @@ calendar: '', dashboard: '', blog: '', - analytics: '' + analytics: '', + server: '' }; return icons[iconType] || icons.default; } @@ -154,6 +155,9 @@ 🔒 Hooks Dashboard + + 💾 Disk Monitoring + 📁 Project Manager diff --git a/src/controllers/diskMetrics.controller.js b/src/controllers/diskMetrics.controller.js new file mode 100644 index 00000000..d9e4ce7c --- /dev/null +++ b/src/controllers/diskMetrics.controller.js @@ -0,0 +1,99 @@ +/** + * Disk Metrics Controller + * Handles API requests for disk usage and system metrics + */ + +const DiskMetrics = require('../models/DiskMetrics.model'); +const logger = require('../utils/logger.util'); + +/** + * Get all metrics (local + remote) + * GET /api/admin/disk-metrics + */ +async function getAllMetrics(req, res) { + try { + logger.info('Fetching disk metrics for all systems'); + const metrics = await DiskMetrics.getAllMetrics(); + + // Add health status for each system + if (metrics.local && metrics.local.usedPercent !== undefined) { + metrics.local.health = DiskMetrics.getHealthStatus(metrics.local.usedPercent); + } + + if (metrics.remote && metrics.remote.usedPercent !== undefined) { + metrics.remote.health = DiskMetrics.getHealthStatus(metrics.remote.usedPercent); + } + + res.json({ + success: true, + data: metrics + }); + } catch (error) { + logger.error('Get all metrics error:', error); + res.status(500).json({ + success: false, + error: 'Failed to fetch disk metrics', + message: error.message + }); + } +} + +/** + * Get local metrics only + * GET /api/admin/disk-metrics/local + */ +async function getLocalMetrics(req, res) { + try { + logger.info('Fetching local disk metrics'); + const metrics = await DiskMetrics.getLocalMetrics(); + + if (metrics.usedPercent !== undefined) { + metrics.health = DiskMetrics.getHealthStatus(metrics.usedPercent); + } + + res.json({ + success: true, + data: metrics + }); + } catch (error) { + logger.error('Get local metrics error:', error); + res.status(500).json({ + success: false, + error: 'Failed to fetch local metrics', + message: error.message + }); + } +} + +/** + * Get remote metrics only + * GET /api/admin/disk-metrics/remote + */ +async function getRemoteMetrics(req, res) { + try { + logger.info('Fetching remote disk metrics'); + const metrics = await DiskMetrics.getRemoteMetrics(); + + if (metrics.usedPercent !== undefined) { + metrics.health = DiskMetrics.getHealthStatus(metrics.usedPercent); + } + + res.json({ + success: true, + data: metrics + }); + } catch (error) { + logger.error('Get remote metrics error:', error); + res.status(500).json({ + success: false, + error: 'Failed to fetch remote metrics', + message: error.message + }); + } +} + +module.exports = { + getAllMetrics, + getLocalMetrics, + getRemoteMetrics +}; diff --git a/src/models/DiskMetrics.model.js b/src/models/DiskMetrics.model.js new file mode 100644 index 00000000..fefbc358 --- /dev/null +++ b/src/models/DiskMetrics.model.js @@ -0,0 +1,150 @@ +/** + * Disk Metrics Model + * Collects and provides disk usage metrics for local and remote systems + */ + +const { exec } = require('child_process'); +const { promisify } = require('util'); +const execAsync = promisify(exec); +const os = require('os'); + +class DiskMetrics { + /** + * Get local system disk metrics + */ + static async getLocalMetrics() { + try { + const platform = os.platform(); + let diskCmd; + + if (platform === 'linux' || platform === 'darwin') { + diskCmd = "df -h / | tail -1 | awk '{print $2,$3,$4,$5}'"; + } else if (platform === 'win32') { + diskCmd = 'wmic logicaldisk get size,freespace,caption'; + } else { + throw new Error(`Unsupported platform: ${platform}`); + } + + const { stdout } = await execAsync(diskCmd); + const parts = stdout.trim().split(/\s+/); + + let metrics; + if (platform === 'linux' || platform === 'darwin') { + // Linux/Mac output: Total Used Available Use% + metrics = { + total: parts[0], + used: parts[1], + available: parts[2], + usedPercent: parseInt(parts[3].replace('%', '')) + }; + } + + // Add system info + metrics.hostname = os.hostname(); + metrics.platform = platform; + metrics.uptime = Math.floor(os.uptime() / 3600); // hours + metrics.timestamp = new Date().toISOString(); + + // Memory metrics + const totalMem = os.totalmem(); + const freeMem = os.freemem(); + metrics.memory = { + total: (totalMem / (1024 ** 3)).toFixed(2) + 'GB', + used: ((totalMem - freeMem) / (1024 ** 3)).toFixed(2) + 'GB', + free: (freeMem / (1024 ** 3)).toFixed(2) + 'GB', + usedPercent: Math.round(((totalMem - freeMem) / totalMem) * 100) + }; + + return metrics; + } catch (error) { + console.error('Error getting local metrics:', error); + return { + error: error.message, + hostname: os.hostname(), + timestamp: new Date().toISOString() + }; + } + } + + /** + * Get remote production server metrics via SSH + */ + static async getRemoteMetrics() { + try { + const sshKey = process.env.DEPLOY_SSH_KEY || '~/.ssh/tractatus_deploy'; + const remoteHost = process.env.DEPLOY_HOST || 'ubuntu@vps-93a693da.vps.ovh.net'; + + // Get disk metrics + const diskCmd = `ssh -i ${sshKey} ${remoteHost} "df -h / | tail -1 | awk '{print \\$2,\\$3,\\$4,\\$5}'"`; + const { stdout: diskOutput } = await execAsync(diskCmd); + const diskParts = diskOutput.trim().split(/\s+/); + + // Get memory metrics + const memCmd = `ssh -i ${sshKey} ${remoteHost} "free -h | grep Mem: | awk '{print \\$2,\\$3,\\$4}'"`; + const { stdout: memOutput } = await execAsync(memCmd); + const memParts = memOutput.trim().split(/\s+/); + + // Get uptime + const uptimeCmd = `ssh -i ${sshKey} ${remoteHost} "uptime -p"`; + const { stdout: uptimeOutput } = await execAsync(uptimeCmd); + + // Get Docker volumes (if available) + const dockerVolCmd = `ssh -i ${sshKey} ${remoteHost} "docker system df -v 2>/dev/null | grep umami_db_data | awk '{print \\$3}'" || echo "N/A"`; + const { stdout: dockerVolOutput } = await execAsync(dockerVolCmd); + + const metrics = { + hostname: remoteHost.split('@')[1] || remoteHost, + platform: 'linux', + timestamp: new Date().toISOString(), + total: diskParts[0], + used: diskParts[1], + available: diskParts[2], + usedPercent: parseInt(diskParts[3].replace('%', '')), + memory: { + total: memParts[0], + used: memParts[1], + free: memParts[2] + }, + uptime: uptimeOutput.trim(), + dockerVolume: dockerVolOutput.trim() + }; + + return metrics; + } catch (error) { + console.error('Error getting remote metrics:', error); + return { + error: error.message, + hostname: 'production-vps', + timestamp: new Date().toISOString() + }; + } + } + + /** + * Get metrics for both local and remote systems + */ + static async getAllMetrics() { + const [local, remote] = await Promise.allSettled([ + this.getLocalMetrics(), + this.getRemoteMetrics() + ]); + + return { + local: local.status === 'fulfilled' ? local.value : { error: local.reason?.message }, + remote: remote.status === 'fulfilled' ? remote.value : { error: remote.reason?.message }, + collectedAt: new Date().toISOString() + }; + } + + /** + * Get health status based on metrics + */ + static getHealthStatus(usedPercent) { + if (usedPercent >= 90) return { level: 'critical', color: 'red' }; + if (usedPercent >= 80) return { level: 'warning', color: 'yellow' }; + if (usedPercent >= 70) return { level: 'caution', color: 'orange' }; + return { level: 'healthy', color: 'green' }; + } +} + +module.exports = DiskMetrics; diff --git a/src/routes/diskMetrics.routes.js b/src/routes/diskMetrics.routes.js new file mode 100644 index 00000000..6c1be8f6 --- /dev/null +++ b/src/routes/diskMetrics.routes.js @@ -0,0 +1,34 @@ +/** + * Disk Metrics Routes + * Admin-only endpoints for system disk monitoring + */ + +const express = require('express'); +const router = express.Router(); +const diskMetricsController = require('../controllers/diskMetrics.controller'); +const { asyncHandler } = require('../middleware/error.middleware'); +const { authenticateToken, requireRole } = require('../middleware/auth.middleware'); + +// All routes require admin authentication +router.use(authenticateToken); +router.use(requireRole('admin')); + +/** + * GET /api/admin/disk-metrics + * Get metrics for all systems (local + remote) + */ +router.get('/', asyncHandler(diskMetricsController.getAllMetrics)); + +/** + * GET /api/admin/disk-metrics/local + * Get local system metrics only + */ +router.get('/local', asyncHandler(diskMetricsController.getLocalMetrics)); + +/** + * GET /api/admin/disk-metrics/remote + * Get remote production metrics only + */ +router.get('/remote', asyncHandler(diskMetricsController.getRemoteMetrics)); + +module.exports = router; diff --git a/src/routes/index.js b/src/routes/index.js index 8881ca7d..0ebcfe22 100644 --- a/src/routes/index.js +++ b/src/routes/index.js @@ -32,6 +32,7 @@ const inboxRoutes = require('./inbox.routes'); const crmRoutes = require('./crm.routes'); const missedBreachRoutes = require('./missedBreach.routes'); const researchRoutes = require('./research.routes'); +const diskMetricsRoutes = require('./diskMetrics.routes'); // Development/test routes (only in development) if (process.env.NODE_ENV !== 'production') { @@ -65,6 +66,7 @@ router.use('/inbox', inboxRoutes); router.use('/crm', crmRoutes); router.use('/admin/missed-breaches', missedBreachRoutes); router.use('/research-inquiry', researchRoutes); +router.use('/admin/disk-metrics', diskMetricsRoutes); // API root endpoint - redirect browsers to documentation router.get('/', (req, res) => {