tractatus/scripts/monitoring/log-monitor.sh
TheFlow 2298d36bed fix(submissions): restructure Economist package and fix article display
- Create Economist SubmissionTracking package correctly:
  * mainArticle = full blog post content
  * coverLetter = 216-word SIR— letter
  * Links to blog post via blogPostId
- Archive 'Letter to The Economist' from blog posts (it's the cover letter)
- Fix date display on article cards (use published_at)
- Target publication already displaying via blue badge

Database changes:
- Make blogPostId optional in SubmissionTracking model
- Economist package ID: 68fa85ae49d4900e7f2ecd83
- Le Monde package ID: 68fa2abd2e6acd5691932150

Next: Enhanced modal with tabs, validation, export

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-24 08:47:42 +13:00

278 lines
7.1 KiB
Bash
Executable file

#!/bin/bash
#
# Log Monitoring Script
# Monitors Tractatus service logs for errors, security events, and anomalies
#
# Usage:
# ./log-monitor.sh # Monitor logs since last check
# ./log-monitor.sh --since "1 hour" # Monitor specific time window
# ./log-monitor.sh --follow # Continuous monitoring
# ./log-monitor.sh --test # Test mode (no alerts)
#
# Exit codes:
# 0 = No issues found
# 1 = Errors detected
# 2 = Critical errors detected
# 3 = Configuration error
set -euo pipefail
# Configuration
SERVICE_NAME="${SERVICE_NAME:-tractatus}"
ALERT_EMAIL="${ALERT_EMAIL:-}"
LOG_FILE="/var/log/tractatus/log-monitor.log"
STATE_FILE="/var/tmp/tractatus-log-monitor-state"
ERROR_THRESHOLD=10 # Alert after 10 errors in window
CRITICAL_THRESHOLD=3 # Alert immediately after 3 critical errors
# Parse arguments
SINCE="5 minutes ago"
FOLLOW=false
TEST_MODE=false
while [[ $# -gt 0 ]]; do
case $1 in
--since)
SINCE="$2"
shift 2
;;
--follow)
FOLLOW=true
shift
;;
--test)
TEST_MODE=true
shift
;;
*)
echo "Unknown option: $1"
exit 3
;;
esac
done
# Logging function
log() {
local level="$1"
shift
local message="$*"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "[$timestamp] [$level] $message"
# Log to file if directory exists
if [[ -d "$(dirname "$LOG_FILE")" ]]; then
echo "[$timestamp] [$level] $message" >> "$LOG_FILE"
fi
}
# Send alert email
send_alert() {
local subject="$1"
local body="$2"
if [[ "$TEST_MODE" == "true" ]]; then
log "INFO" "TEST MODE: Would send alert: $subject"
return 0
fi
if [[ -z "$ALERT_EMAIL" ]]; then
log "WARN" "No alert email configured (ALERT_EMAIL not set)"
return 0
fi
if command -v mail &> /dev/null; then
echo "$body" | mail -s "$subject" "$ALERT_EMAIL"
log "INFO" "Alert email sent to $ALERT_EMAIL"
elif command -v sendmail &> /dev/null; then
{
echo "Subject: $subject"
echo "From: tractatus-monitoring@agenticgovernance.digital"
echo "To: $ALERT_EMAIL"
echo ""
echo "$body"
} | sendmail "$ALERT_EMAIL"
log "INFO" "Alert email sent via sendmail to $ALERT_EMAIL"
else
log "WARN" "No email command available"
fi
}
# Extract errors from logs
extract_errors() {
local since="$1"
# Get logs since specified time
sudo journalctl -u "$SERVICE_NAME" --since "$since" --no-pager 2>/dev/null || {
log "ERROR" "Failed to read journal for $SERVICE_NAME"
return 1
}
}
# Analyze log patterns
analyze_logs() {
local logs="$1"
# Count different severity levels (grep -c returns 0 if no matches, no need for fallback)
local error_count=$(echo "$logs" | grep -ci "\[ERROR\]" || true)
[[ -z "$error_count" ]] && error_count=0
local critical_count=$(echo "$logs" | grep -ci "\[CRITICAL\]" || true)
[[ -z "$critical_count" ]] && critical_count=0
local warn_count=$(echo "$logs" | grep -ci "\[WARN\]" || true)
[[ -z "$warn_count" ]] && warn_count=0
# Security-related patterns
local security_count=$(echo "$logs" | grep -ciE "(SECURITY|unauthorized|forbidden|authentication failed)" || true)
[[ -z "$security_count" ]] && security_count=0
# Database errors
local db_error_count=$(echo "$logs" | grep -ciE "(mongodb|database|connection.*failed)" || true)
[[ -z "$db_error_count" ]] && db_error_count=0
# HTTP errors
local http_error_count=$(echo "$logs" | grep -ciE "HTTP.*50[0-9]|Internal Server Error" || true)
[[ -z "$http_error_count" ]] && http_error_count=0
# Unhandled exceptions
local exception_count=$(echo "$logs" | grep -ciE "(Unhandled.*exception|TypeError|ReferenceError)" || true)
[[ -z "$exception_count" ]] && exception_count=0
log "INFO" "Log analysis: CRITICAL=$critical_count ERROR=$error_count WARN=$warn_count SECURITY=$security_count DB_ERROR=$db_error_count HTTP_ERROR=$http_error_count EXCEPTION=$exception_count"
# Determine severity
if [[ "$critical_count" -ge "$CRITICAL_THRESHOLD" ]]; then
log "CRITICAL" "Critical error threshold exceeded: $critical_count critical errors"
return 2
fi
if [[ "$error_count" -ge "$ERROR_THRESHOLD" ]]; then
log "ERROR" "Error threshold exceeded: $error_count errors"
return 1
fi
if [[ "$security_count" -gt 0 ]]; then
log "WARN" "Security events detected: $security_count events"
fi
if [[ "$db_error_count" -gt 5 ]]; then
log "WARN" "Database errors detected: $db_error_count errors"
fi
if [[ "$exception_count" -gt 0 ]]; then
log "WARN" "Unhandled exceptions detected: $exception_count exceptions"
fi
return 0
}
# Extract top error messages
get_top_errors() {
local logs="$1"
local limit="${2:-10}"
echo "$logs" | grep -iE "\[ERROR\]|\[CRITICAL\]" | \
sed 's/^.*\] //' | \
sort | uniq -c | sort -rn | head -n "$limit"
}
# Main monitoring function
main() {
log "INFO" "Starting log monitoring (since: $SINCE)"
# Extract logs
local logs
logs=$(extract_errors "$SINCE") || {
log "ERROR" "Failed to extract logs"
exit 3
}
# Count total log entries
local log_count=$(echo "$logs" | wc -l)
log "INFO" "Analyzing $log_count log entries"
if [[ "$log_count" -eq 0 ]]; then
log "INFO" "No logs found in time window"
exit 0
fi
# Analyze logs
local exit_code=0
analyze_logs "$logs" || exit_code=$?
# If errors detected, send alert
if [[ "$exit_code" -ne 0 ]]; then
local severity="ERROR"
[[ "$exit_code" -eq 2 ]] && severity="CRITICAL"
local subject="[ALERT] Tractatus Log Monitoring - $severity Detected"
# Extract top 10 error messages
local top_errors=$(get_top_errors "$logs" 10)
local body="Log monitoring detected $severity level issues in Tractatus service.
Time Window: $SINCE
Time: $(date '+%Y-%m-%d %H:%M:%S %Z')
Host: $(hostname)
Service: $SERVICE_NAME
Top Error Messages:
$top_errors
Recent Critical/Error Logs:
$(echo "$logs" | grep -iE "\[ERROR\]|\[CRITICAL\]" | tail -n 20)
Full logs:
sudo journalctl -u $SERVICE_NAME --since \"$SINCE\"
Check service status:
sudo systemctl status $SERVICE_NAME
"
send_alert "$subject" "$body"
else
log "INFO" "No significant issues detected"
fi
exit $exit_code
}
# Follow mode (continuous monitoring)
follow_logs() {
log "INFO" "Starting continuous log monitoring"
sudo journalctl -u "$SERVICE_NAME" -f --no-pager | while read -r line; do
# Check for error patterns
if echo "$line" | grep -qiE "\[ERROR\]|\[CRITICAL\]"; then
log "ERROR" "$line"
# Extract error message
local error_msg=$(echo "$line" | sed 's/^.*\] //')
# Check for critical patterns
if echo "$line" | grep -qiE "\[CRITICAL\]|Unhandled.*exception|Database.*failed|Service.*crashed"; then
local subject="[CRITICAL] Tractatus Error Detected"
local body="Critical error detected in Tractatus logs:
$line
Time: $(date '+%Y-%m-%d %H:%M:%S %Z')
Host: $(hostname)
Recent logs:
$(sudo journalctl -u $SERVICE_NAME -n 10 --no-pager)
"
send_alert "$subject" "$body"
fi
fi
done
}
# Run appropriate mode
if [[ "$FOLLOW" == "true" ]]; then
follow_logs
else
main
fi