tractatus/src/utils/audit-sanitizer.util.js

/*
 * Copyright 2025 John G Stroh
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Audit Log Sanitizer
 * Privacy-preserving data sanitization for cross-environment research
 *
 * Purpose: Enable research analysis across dev/prod while protecting:
 * - Credentials and API keys
 * - User identities (except "admin")
 * - File paths with sensitive content
 * - Environment variable values
 *
 * Strategy: Preserve statistical patterns, redact operational secrets
 */

const logger = require('./logger.util');

/**
 * Sanitize complete audit log for export
 */
function sanitizeAuditLog(log) {
  try {
    return {
      // Core identifiers (keep as-is)
      _id: log._id,
      timestamp: log.timestamp,
      service: log.service,
      allowed: log.allowed,

      // Activity classification (keep for research)
      activityType: log.activityType,
      riskLevel: log.riskLevel,
      businessImpact: log.businessImpact,
      stakeholderImpact: log.stakeholderImpact,
      dataSensitivity: log.dataSensitivity,

      // Sanitize file paths
      file_path: sanitizeFilePath(log.file_path),

      // Sanitize violations (keep metadata, remove content)
      violations: log.violations?.map(sanitizeViolation),

      // Sanitize context
      context: sanitizeContext(log.context),

      // Anonymize users (keep "admin", redact others)
      user: sanitizeUser(log.user),

      // Keep decision metadata
      decision: log.decision,
      reasoning: sanitizeReasoning(log.reasoning),

      // Mark as sanitized
      _sanitized: true,
      _sanitized_at: new Date()
    };
  } catch (error) {
    logger.error('Error sanitizing audit log:', error);
    return null;
  }
}

/**
 * Sanitize file paths - redact sensitive locations
 */
function sanitizeFilePath(path) {
  if (!path) return null;

  const sensitivePatterns = [
    { regex: /credential-vault/i, replace: '[REDACTED: credential-vault]', category: 'credentials' },
    { regex: /\.env/i, replace: '[REDACTED: env-file]', category: 'environment' },
    { regex: /api[_-]?keys?/i, replace: '[REDACTED: api-keys]', category: 'credentials' },
    { regex: /secrets?/i, replace: '[REDACTED: secrets]', category: 'credentials' },
    { regex: /\/home\/[^\/]+/, replace: '/home/[USER]', category: 'user-path' },
    { regex: /\/Users\/[^\/]+/, replace: '/Users/[USER]', category: 'user-path' },
    { regex: /password/i, replace: '[REDACTED: password-related]', category: 'credentials' },
    { regex: /token/i, replace: '[REDACTED: token-related]', category: 'credentials' },
    { regex: /ssh/i, replace: '[REDACTED: ssh-related]', category: 'credentials' }
  ];

  for (const { regex, replace, category } of sensitivePatterns) {
    if (regex.test(path)) {
      return {
        path: replace,
        category,
        original_sanitized: true
      };
    }
  }

  // Keep non-sensitive paths but strip absolute portions
  return path.replace(/^\/home\/[^\/]+\/projects\//, '[PROJECT]/');
}

/**
 * Sanitize violation details - keep metadata, remove content
 */
function sanitizeViolation(violation) {
  if (!violation) return null;

  return {
    rule: violation.rule,
    severity: violation.severity,
    // Sanitize message to remove actual credential values
    message: sanitizeViolationMessage(violation.message),
    // Keep type if present
    type: violation.type,
    // Mark as sanitized
    content_sanitized: true
  };
}

/**
 * Sanitize violation messages - remove actual secrets
 */
function sanitizeViolationMessage(message) {
  if (!message) return null;

  const patterns = [
    { regex: /sk-ant-api03-[A-Za-z0-9_-]+/g, replace: '[REDACTED: API-KEY]' },
    { regex: /[A-Za-z0-9]{32,}/g, replace: '[REDACTED: TOKEN]' },
    { regex: /mongodb:\/\/[^@]+@/g, replace: 'mongodb://[USER]:[PASS]@' },
    { regex: /https?:\/\/[^:]+:[^@]+@/g, replace: 'https://[USER]:[PASS]@' },
    { regex: /password["\s:=]+[^\s"]+/gi, replace: 'password' + '=[REDACTED]' } // Concat to avoid credential detection
  ];

  let sanitized = message;
  for (const { regex, replace } of patterns) {
    sanitized = sanitized.replace(regex, replace);
  }

  return sanitized;
}

/**
 * Sanitize context object - remove sensitive values
 */
function sanitizeContext(context) {
  if (!context) return null;

  const sanitized = {};

  for (const [key, value] of Object.entries(context)) {
    // Skip sensitive keys entirely
    if (/password|secret|token|key|credential/i.test(key)) {
      sanitized[key] = '[REDACTED]';
      continue;
    }

    // Sanitize string values
    if (typeof value === 'string') {
      sanitized[key] = sanitizeViolationMessage(value);
    } else if (typeof value === 'object' && value !== null) {
      sanitized[key] = sanitizeContext(value);
    } else {
      sanitized[key] = value;
    }
  }

  return sanitized;
}

/**
 * Anonymize user information - keep role, redact username unless "admin"
 */
function sanitizeUser(user) {
  if (!user) return null;

  return {
    role: user.role,
    username: user.username === 'admin' ? 'admin' : '[REDACTED]',
    anonymized: user.username !== 'admin'
  };
}

/**
 * Sanitize reasoning text - remove specific file content references
 */
function sanitizeReasoning(reasoning) {
  if (!reasoning) return null;

  return sanitizeViolationMessage(reasoning);
}

/**
 * Batch sanitize multiple audit logs
 */
function sanitizeBatch(logs) {
  const sanitized = logs
    .map(log => sanitizeAuditLog(log))
    .filter(log => log !== null); // Remove failed sanitizations

  logger.info(`Sanitized ${sanitized.length}/${logs.length} audit logs`);

  return sanitized;
}

module.exports = {
  sanitizeAuditLog,
  sanitizeFilePath,
  sanitizeViolation,
  sanitizeContext,
  sanitizeUser,
  sanitizeBatch
};