/* * Copyright 2025 John G Stroh * * Licensed under the European Union Public Licence, Version 1.2 (EUPL-1.2); * you may not use this file except in compliance with the Licence. * * You may obtain a copy of the Licence at: * https://interoperable-europe.ec.europa.eu/collection/eupl/eupl-text-eupl-12 * * Unless required by applicable law or agreed to in writing, software * distributed under the Licence is distributed on an "AS IS" basis, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Licence for the specific language governing permissions and * limitations under the Licence. */ /** * Audit Log Sanitizer * Privacy-preserving data sanitization for cross-environment research * * Purpose: Enable research analysis across dev/prod while protecting: * - Credentials and API keys * - User identities (except "admin") * - File paths with sensitive content * - Environment variable values * * Strategy: Preserve statistical patterns, redact operational secrets */ const logger = require('./logger.util'); /** * Sanitize complete audit log for export */ function sanitizeAuditLog(log) { try { return { // Core identifiers (keep as-is) _id: log._id, timestamp: log.timestamp, service: log.service, allowed: log.allowed, // Activity classification (keep for research) activityType: log.activityType, riskLevel: log.riskLevel, businessImpact: log.businessImpact, stakeholderImpact: log.stakeholderImpact, dataSensitivity: log.dataSensitivity, // Sanitize file paths file_path: sanitizeFilePath(log.file_path), // Sanitize violations (keep metadata, remove content) violations: log.violations?.map(sanitizeViolation), // Sanitize context context: sanitizeContext(log.context), // Anonymize users (keep "admin", redact others) user: sanitizeUser(log.user), // Keep decision metadata decision: log.decision, reasoning: sanitizeReasoning(log.reasoning), // Mark as sanitized _sanitized: true, _sanitized_at: new Date() }; } catch (error) { logger.error('Error sanitizing audit log:', error); return null; } } /** * Sanitize file paths - redact sensitive locations */ function sanitizeFilePath(path) { if (!path) return null; const sensitivePatterns = [ { regex: /credential-vault/i, replace: '[REDACTED: credential-vault]', category: 'credentials' }, { regex: /\.env/i, replace: '[REDACTED: env-file]', category: 'environment' }, { regex: /api[_-]?keys?/i, replace: '[REDACTED: api-keys]', category: 'credentials' }, { regex: /secrets?/i, replace: '[REDACTED: secrets]', category: 'credentials' }, { regex: /\/home\/[^\/]+/, replace: '/home/[USER]', category: 'user-path' }, { regex: /\/Users\/[^\/]+/, replace: '/Users/[USER]', category: 'user-path' }, { regex: /password/i, replace: '[REDACTED: password-related]', category: 'credentials' }, { regex: /token/i, replace: '[REDACTED: token-related]', category: 'credentials' }, { regex: /ssh/i, replace: '[REDACTED: ssh-related]', category: 'credentials' } ]; for (const { regex, replace, category } of sensitivePatterns) { if (regex.test(path)) { return { path: replace, category, original_sanitized: true }; } } // Keep non-sensitive paths but strip absolute portions return path.replace(/^\/home\/[^\/]+\/projects\//, '[PROJECT]/'); } /** * Sanitize violation details - keep metadata, remove content */ function sanitizeViolation(violation) { if (!violation) return null; return { rule: violation.rule, severity: violation.severity, // Sanitize message to remove actual credential values message: sanitizeViolationMessage(violation.message), // Keep type if present type: violation.type, // Mark as sanitized content_sanitized: true }; } /** * Sanitize violation messages - remove actual secrets */ function sanitizeViolationMessage(message) { if (!message) return null; const patterns = [ { regex: /sk-ant-api03-[A-Za-z0-9_-]+/g, replace: '[REDACTED: API-KEY]' }, { regex: /[A-Za-z0-9]{32,}/g, replace: '[REDACTED: TOKEN]' }, { regex: /mongodb:\/\/[^@]+@/g, replace: 'mongodb://[USER]:[PASS]@' }, { regex: /https?:\/\/[^:]+:[^@]+@/g, replace: 'https://[USER]:[PASS]@' }, { regex: /password["\s:=]+[^\s"]+/gi, replace: 'password' + '=[REDACTED]' } // Concat to avoid credential detection ]; let sanitized = message; for (const { regex, replace } of patterns) { sanitized = sanitized.replace(regex, replace); } return sanitized; } /** * Sanitize context object - remove sensitive values */ function sanitizeContext(context) { if (!context) return null; const sanitized = {}; for (const [key, value] of Object.entries(context)) { // Skip sensitive keys entirely if (/password|secret|token|key|credential/i.test(key)) { sanitized[key] = '[REDACTED]'; continue; } // Sanitize string values if (typeof value === 'string') { sanitized[key] = sanitizeViolationMessage(value); } else if (typeof value === 'object' && value !== null) { sanitized[key] = sanitizeContext(value); } else { sanitized[key] = value; } } return sanitized; } /** * Anonymize user information - keep role, redact username unless "admin" */ function sanitizeUser(user) { if (!user) return null; return { role: user.role, username: user.username === 'admin' ? 'admin' : '[REDACTED]', anonymized: user.username !== 'admin' }; } /** * Sanitize reasoning text - remove specific file content references */ function sanitizeReasoning(reasoning) { if (!reasoning) return null; return sanitizeViolationMessage(reasoning); } /** * Batch sanitize multiple audit logs */ function sanitizeBatch(logs) { const sanitized = logs .map(log => sanitizeAuditLog(log)) .filter(log => log !== null); // Remove failed sanitizations logger.info(`Sanitized ${sanitized.length}/${logs.length} audit logs`); return sanitized; } module.exports = { sanitizeAuditLog, sanitizeFilePath, sanitizeViolation, sanitizeContext, sanitizeUser, sanitizeBatch };