tractatus/src/utils/audit-sanitizer.util.js
TheFlow 8ecd770fce feat(research): add cross-environment audit log sync infrastructure
Implements privacy-preserving synchronization of production audit logs
to development for comprehensive governance research analysis.

Backend Components:
- SyncMetadata.model.js: Track sync state and statistics
- audit-sanitizer.util.js: Privacy sanitization utility
  - Redacts credentials, API keys, user identities
  - Sanitizes file paths and violation content
  - Preserves statistical patterns for research
- sync-prod-audit-logs.js: CLI sync script
  - Incremental sync with deduplication
  - Dry-run mode for testing
  - Configurable date range
- AuditLog.model.js: Enhanced schema with environment tracking
  - environment field (development/production/staging)
  - sync_metadata tracking (original_id, synced_from, etc.)
  - New indexes for cross-environment queries
- audit.controller.js: New /api/admin/audit-export endpoint
  - Privacy-sanitized export for cross-environment sync
  - Environment filter support in getAuditLogs
- MemoryProxy.service.js: Environment tagging in auditDecision()
  - Tags new logs with NODE_ENV or override
  - Sets is_local flag for tracking

Frontend Components:
- audit-analytics.html: Environment filter dropdown
- audit-analytics.js: Environment filter query parameter handling

Research Benefits:
- Combine dev and prod governance statistics
- Longitudinal analysis across environments
- Validate framework consistency
- Privacy-preserving data sharing

Security:
- API-based export (not direct DB access)
- Admin-only endpoints with JWT authentication
- Comprehensive credential redaction
- One-way sync (production → development)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-27 12:11:16 +13:00

219 lines
6.1 KiB
JavaScript

/*
* Copyright 2025 John G Stroh
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Audit Log Sanitizer
* Privacy-preserving data sanitization for cross-environment research
*
* Purpose: Enable research analysis across dev/prod while protecting:
* - Credentials and API keys
* - User identities (except "admin")
* - File paths with sensitive content
* - Environment variable values
*
* Strategy: Preserve statistical patterns, redact operational secrets
*/
const logger = require('./logger.util');
/**
* Sanitize complete audit log for export
*/
function sanitizeAuditLog(log) {
try {
return {
// Core identifiers (keep as-is)
_id: log._id,
timestamp: log.timestamp,
service: log.service,
allowed: log.allowed,
// Activity classification (keep for research)
activityType: log.activityType,
riskLevel: log.riskLevel,
businessImpact: log.businessImpact,
stakeholderImpact: log.stakeholderImpact,
dataSensitivity: log.dataSensitivity,
// Sanitize file paths
file_path: sanitizeFilePath(log.file_path),
// Sanitize violations (keep metadata, remove content)
violations: log.violations?.map(sanitizeViolation),
// Sanitize context
context: sanitizeContext(log.context),
// Anonymize users (keep "admin", redact others)
user: sanitizeUser(log.user),
// Keep decision metadata
decision: log.decision,
reasoning: sanitizeReasoning(log.reasoning),
// Mark as sanitized
_sanitized: true,
_sanitized_at: new Date()
};
} catch (error) {
logger.error('Error sanitizing audit log:', error);
return null;
}
}
/**
* Sanitize file paths - redact sensitive locations
*/
function sanitizeFilePath(path) {
if (!path) return null;
const sensitivePatterns = [
{ regex: /credential-vault/i, replace: '[REDACTED: credential-vault]', category: 'credentials' },
{ regex: /\.env/i, replace: '[REDACTED: env-file]', category: 'environment' },
{ regex: /api[_-]?keys?/i, replace: '[REDACTED: api-keys]', category: 'credentials' },
{ regex: /secrets?/i, replace: '[REDACTED: secrets]', category: 'credentials' },
{ regex: /\/home\/[^\/]+/, replace: '/home/[USER]', category: 'user-path' },
{ regex: /\/Users\/[^\/]+/, replace: '/Users/[USER]', category: 'user-path' },
{ regex: /password/i, replace: '[REDACTED: password-related]', category: 'credentials' },
{ regex: /token/i, replace: '[REDACTED: token-related]', category: 'credentials' },
{ regex: /ssh/i, replace: '[REDACTED: ssh-related]', category: 'credentials' }
];
for (const { regex, replace, category } of sensitivePatterns) {
if (regex.test(path)) {
return {
path: replace,
category,
original_sanitized: true
};
}
}
// Keep non-sensitive paths but strip absolute portions
return path.replace(/^\/home\/[^\/]+\/projects\//, '[PROJECT]/');
}
/**
* Sanitize violation details - keep metadata, remove content
*/
function sanitizeViolation(violation) {
if (!violation) return null;
return {
rule: violation.rule,
severity: violation.severity,
// Sanitize message to remove actual credential values
message: sanitizeViolationMessage(violation.message),
// Keep type if present
type: violation.type,
// Mark as sanitized
content_sanitized: true
};
}
/**
* Sanitize violation messages - remove actual secrets
*/
function sanitizeViolationMessage(message) {
if (!message) return null;
const patterns = [
{ regex: /sk-ant-api03-[A-Za-z0-9_-]+/g, replace: '[REDACTED: API-KEY]' },
{ regex: /[A-Za-z0-9]{32,}/g, replace: '[REDACTED: TOKEN]' },
{ regex: /mongodb:\/\/[^@]+@/g, replace: 'mongodb://[USER]:[PASS]@' },
{ regex: /https?:\/\/[^:]+:[^@]+@/g, replace: 'https://[USER]:[PASS]@' },
{ regex: /password["\s:=]+[^\s"]+/gi, replace: 'password' + '=[REDACTED]' } // Concat to avoid credential detection
];
let sanitized = message;
for (const { regex, replace } of patterns) {
sanitized = sanitized.replace(regex, replace);
}
return sanitized;
}
/**
* Sanitize context object - remove sensitive values
*/
function sanitizeContext(context) {
if (!context) return null;
const sanitized = {};
for (const [key, value] of Object.entries(context)) {
// Skip sensitive keys entirely
if (/password|secret|token|key|credential/i.test(key)) {
sanitized[key] = '[REDACTED]';
continue;
}
// Sanitize string values
if (typeof value === 'string') {
sanitized[key] = sanitizeViolationMessage(value);
} else if (typeof value === 'object' && value !== null) {
sanitized[key] = sanitizeContext(value);
} else {
sanitized[key] = value;
}
}
return sanitized;
}
/**
* Anonymize user information - keep role, redact username unless "admin"
*/
function sanitizeUser(user) {
if (!user) return null;
return {
role: user.role,
username: user.username === 'admin' ? 'admin' : '[REDACTED]',
anonymized: user.username !== 'admin'
};
}
/**
* Sanitize reasoning text - remove specific file content references
*/
function sanitizeReasoning(reasoning) {
if (!reasoning) return null;
return sanitizeViolationMessage(reasoning);
}
/**
* Batch sanitize multiple audit logs
*/
function sanitizeBatch(logs) {
const sanitized = logs
.map(log => sanitizeAuditLog(log))
.filter(log => log !== null); // Remove failed sanitizations
logger.info(`Sanitized ${sanitized.length}/${logs.length} audit logs`);
return sanitized;
}
module.exports = {
sanitizeAuditLog,
sanitizeFilePath,
sanitizeViolation,
sanitizeContext,
sanitizeUser,
sanitizeBatch
};