tractatus/src/utils/audit-sanitizer.util.js
TheFlow d600f6ed83
Some checks are pending
CI / Run Tests (push) Waiting to run
CI / Lint Code (push) Waiting to run
CI / CSP Compliance Check (push) Waiting to run
chore(license): Phase B — relicense source files from Apache 2.0 to EUPL-1.2
Phase B of PLAN_LICENSE_STANDARDISATION_EUPL12_20260419. Follows Phase A
(c85f310f, 4ddc54a0) which flipped the LICENSE file + README; this commit
propagates EUPL-1.2 through source-file headers.

21 files touched across 4 distinct Apache-reference variants:

- V1 (14 files) — full Apache header block (JS /* ... */): 2 routes + 1
  controller + 7 services + 2 models + 3 utils. Replaced with equivalent
  EUPL-1.2 block pointing at EC canonical URL.
- V2 (2 files) — inline JSDoc license line (Copyright Tractatus Project):
  src/routes/calendar.routes.js + src/models/ScheduledTask.model.js.
  Replaced with EUPL-1.2 v. 1.2 equivalent.
- V3 (4 files) — Python docstring 'License: Apache 2.0': all 4 al-integration
  Python files. Replaced with 'License: EUPL-1.2'.
- V4 (1 file) — al-integration/README.md bare 'Apache 2.0' under '## License'
  heading. Replaced with 'EUPL-1.2'.

Verification:
- grep -r "Apache License|Apache 2.0|apache.org/licenses" src/ al-integration/
  returns zero matches (modulo venv).
- Unit tests: 524/524 pass (npm run test:unit).
- Integration test failures (177) are DB-connection infrastructure, pre-existing,
  unrelated to this header-only change.

Sole author basis: TheFlow, 930+ commits, unilateral relicensing (same as Phase A).

Replacement infrastructure also committed: scripts/relicense-apache-to-eupl.js
(auto-detecting variant replacement, idempotent, --dry-run mode). Reusable for
Phase C (community-repo sweep) if pattern structure aligns.

Out-of-scope Apache mentions still in the repo (next pass, NOT Phase B):
- SESSION_HANDOFF_ENFORCEMENT_COMPLETE.md (root doc)
- CLAUDE_Tractatus_Maintenance_Guide.md (root doc)
- For Claude Web/tractatus-claude-web-complete/** (docs snapshot subdirectory)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 20:32:09 +12:00

219 lines
6.2 KiB
JavaScript

/*
* Copyright 2025 John G Stroh
*
* Licensed under the European Union Public Licence, Version 1.2 (EUPL-1.2);
* you may not use this file except in compliance with the Licence.
*
* You may obtain a copy of the Licence at:
* https://interoperable-europe.ec.europa.eu/collection/eupl/eupl-text-eupl-12
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the Licence is distributed on an "AS IS" basis,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Licence for the specific language governing permissions and
* limitations under the Licence.
*/
/**
* Audit Log Sanitizer
* Privacy-preserving data sanitization for cross-environment research
*
* Purpose: Enable research analysis across dev/prod while protecting:
* - Credentials and API keys
* - User identities (except "admin")
* - File paths with sensitive content
* - Environment variable values
*
* Strategy: Preserve statistical patterns, redact operational secrets
*/
const logger = require('./logger.util');
/**
* Sanitize complete audit log for export
*/
function sanitizeAuditLog(log) {
try {
return {
// Core identifiers (keep as-is)
_id: log._id,
timestamp: log.timestamp,
service: log.service,
allowed: log.allowed,
// Activity classification (keep for research)
activityType: log.activityType,
riskLevel: log.riskLevel,
businessImpact: log.businessImpact,
stakeholderImpact: log.stakeholderImpact,
dataSensitivity: log.dataSensitivity,
// Sanitize file paths
file_path: sanitizeFilePath(log.file_path),
// Sanitize violations (keep metadata, remove content)
violations: log.violations?.map(sanitizeViolation),
// Sanitize context
context: sanitizeContext(log.context),
// Anonymize users (keep "admin", redact others)
user: sanitizeUser(log.user),
// Keep decision metadata
decision: log.decision,
reasoning: sanitizeReasoning(log.reasoning),
// Mark as sanitized
_sanitized: true,
_sanitized_at: new Date()
};
} catch (error) {
logger.error('Error sanitizing audit log:', error);
return null;
}
}
/**
* Sanitize file paths - redact sensitive locations
*/
function sanitizeFilePath(path) {
if (!path) return null;
const sensitivePatterns = [
{ regex: /credential-vault/i, replace: '[REDACTED: credential-vault]', category: 'credentials' },
{ regex: /\.env/i, replace: '[REDACTED: env-file]', category: 'environment' },
{ regex: /api[_-]?keys?/i, replace: '[REDACTED: api-keys]', category: 'credentials' },
{ regex: /secrets?/i, replace: '[REDACTED: secrets]', category: 'credentials' },
{ regex: /\/home\/[^\/]+/, replace: '/home/[USER]', category: 'user-path' },
{ regex: /\/Users\/[^\/]+/, replace: '/Users/[USER]', category: 'user-path' },
{ regex: /password/i, replace: '[REDACTED: password-related]', category: 'credentials' },
{ regex: /token/i, replace: '[REDACTED: token-related]', category: 'credentials' },
{ regex: /ssh/i, replace: '[REDACTED: ssh-related]', category: 'credentials' }
];
for (const { regex, replace, category } of sensitivePatterns) {
if (regex.test(path)) {
return {
path: replace,
category,
original_sanitized: true
};
}
}
// Keep non-sensitive paths but strip absolute portions
return path.replace(/^\/home\/[^\/]+\/projects\//, '[PROJECT]/');
}
/**
* Sanitize violation details - keep metadata, remove content
*/
function sanitizeViolation(violation) {
if (!violation) return null;
return {
rule: violation.rule,
severity: violation.severity,
// Sanitize message to remove actual credential values
message: sanitizeViolationMessage(violation.message),
// Keep type if present
type: violation.type,
// Mark as sanitized
content_sanitized: true
};
}
/**
* Sanitize violation messages - remove actual secrets
*/
function sanitizeViolationMessage(message) {
if (!message) return null;
const patterns = [
{ regex: /sk-ant-api03-[A-Za-z0-9_-]+/g, replace: '[REDACTED: API-KEY]' },
{ regex: /[A-Za-z0-9]{32,}/g, replace: '[REDACTED: TOKEN]' },
{ regex: /mongodb:\/\/[^@]+@/g, replace: 'mongodb://[USER]:[PASS]@' },
{ regex: /https?:\/\/[^:]+:[^@]+@/g, replace: 'https://[USER]:[PASS]@' },
{ regex: /password["\s:=]+[^\s"]+/gi, replace: 'password' + '=[REDACTED]' } // Concat to avoid credential detection
];
let sanitized = message;
for (const { regex, replace } of patterns) {
sanitized = sanitized.replace(regex, replace);
}
return sanitized;
}
/**
* Sanitize context object - remove sensitive values
*/
function sanitizeContext(context) {
if (!context) return null;
const sanitized = {};
for (const [key, value] of Object.entries(context)) {
// Skip sensitive keys entirely
if (/password|secret|token|key|credential/i.test(key)) {
sanitized[key] = '[REDACTED]';
continue;
}
// Sanitize string values
if (typeof value === 'string') {
sanitized[key] = sanitizeViolationMessage(value);
} else if (typeof value === 'object' && value !== null) {
sanitized[key] = sanitizeContext(value);
} else {
sanitized[key] = value;
}
}
return sanitized;
}
/**
* Anonymize user information - keep role, redact username unless "admin"
*/
function sanitizeUser(user) {
if (!user) return null;
return {
role: user.role,
username: user.username === 'admin' ? 'admin' : '[REDACTED]',
anonymized: user.username !== 'admin'
};
}
/**
* Sanitize reasoning text - remove specific file content references
*/
function sanitizeReasoning(reasoning) {
if (!reasoning) return null;
return sanitizeViolationMessage(reasoning);
}
/**
* Batch sanitize multiple audit logs
*/
function sanitizeBatch(logs) {
const sanitized = logs
.map(log => sanitizeAuditLog(log))
.filter(log => log !== null); // Remove failed sanitizations
logger.info(`Sanitized ${sanitized.length}/${logs.length} audit logs`);
return sanitized;
}
module.exports = {
sanitizeAuditLog,
sanitizeFilePath,
sanitizeViolation,
sanitizeContext,
sanitizeUser,
sanitizeBatch
};