tractatus/scripts/sync-prod-audit-logs.js
TheFlow d854ac85e2 feat(research): add cross-environment audit log sync infrastructure
Implements privacy-preserving synchronization of production audit logs
to development for comprehensive governance research analysis.

Backend Components:
- SyncMetadata.model.js: Track sync state and statistics
- audit-sanitizer.util.js: Privacy sanitization utility
  - Redacts credentials, API keys, user identities
  - Sanitizes file paths and violation content
  - Preserves statistical patterns for research
- sync-prod-audit-logs.js: CLI sync script
  - Incremental sync with deduplication
  - Dry-run mode for testing
  - Configurable date range
- AuditLog.model.js: Enhanced schema with environment tracking
  - environment field (development/production/staging)
  - sync_metadata tracking (original_id, synced_from, etc.)
  - New indexes for cross-environment queries
- audit.controller.js: New /api/admin/audit-export endpoint
  - Privacy-sanitized export for cross-environment sync
  - Environment filter support in getAuditLogs
- MemoryProxy.service.js: Environment tagging in auditDecision()
  - Tags new logs with NODE_ENV or override
  - Sets is_local flag for tracking

Frontend Components:
- audit-analytics.html: Environment filter dropdown
- audit-analytics.js: Environment filter query parameter handling

Research Benefits:
- Combine dev and prod governance statistics
- Longitudinal analysis across environments
- Validate framework consistency
- Privacy-preserving data sharing

Security:
- API-based export (not direct DB access)
- Admin-only endpoints with JWT authentication
- Comprehensive credential redaction
- One-way sync (production → development)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-27 12:11:16 +13:00

249 lines
7.4 KiB
JavaScript
Executable file

#!/usr/bin/env node
/*
* Copyright 2025 John G Stroh
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sync Production Audit Logs to Development
* Privacy-preserving cross-environment research data synchronization
*
* Usage:
* node scripts/sync-prod-audit-logs.js [--since=YYYY-MM-DD] [--dry-run]
*
* Purpose:
* - Combine dev and prod governance statistics for comprehensive analysis
* - Preserve research value while protecting operational secrets
* - Enable comparative analysis (dev vs prod environments)
*
* Privacy:
* - Production data is sanitized before import
* - Credentials, API keys, and user identities redacted
* - File paths generalized
* - Violation content stripped
*/
require('dotenv').config();
const mongoose = require('mongoose');
const fetch = require('node-fetch');
const AuditLog = require('../src/models/AuditLog.model');
const SyncMetadata = require('../src/models/SyncMetadata.model');
const PROD_URL = process.env.PROD_API_URL || 'https://agenticgovernance.digital';
const PROD_TOKEN = process.env.PROD_ADMIN_TOKEN;
if (!PROD_TOKEN) {
console.error('❌ PROD_ADMIN_TOKEN not set in .env');
console.error(' Generate a token in production and add to .env:');
console.error(' PROD_ADMIN_TOKEN=your_production_admin_jwt_token');
process.exit(1);
}
/**
* Main sync function
*/
async function syncProductionAuditLogs(options = {}) {
const { dryRun = false, since = null } = options;
const startTime = Date.now();
try {
// Connect to dev MongoDB
await mongoose.connect(process.env.MONGODB_URI || 'mongodb://localhost:27017/tractatus_dev');
console.log('✓ Connected to dev MongoDB');
// Get last sync metadata
let syncMeta = await SyncMetadata.findOne({ type: 'prod_audit' });
if (!syncMeta) {
// First sync - use provided date or default to 30 days ago
const defaultSince = new Date();
defaultSince.setDate(defaultSince.getDate() - 30);
syncMeta = new SyncMetadata({
type: 'prod_audit',
source_environment: 'production',
last_sync_time: since ? new Date(since) : defaultSince
});
console.log('📅 First sync - starting from:', syncMeta.last_sync_time.toISOString());
} else {
console.log('📅 Last sync:', syncMeta.last_sync_time.toISOString());
}
const sinceDate = since ? new Date(since) : syncMeta.last_sync_time;
// Fetch from production
console.log('\n🌐 Fetching audit logs from production...');
const url = `${PROD_URL}/api/admin/audit-export?since=${sinceDate.toISOString()}`;
const response = await fetch(url, {
headers: {
'Authorization': `Bearer ${PROD_TOKEN}`,
'Content-Type': 'application/json'
}
});
if (!response.ok) {
throw new Error(`Production API error: ${response.status} ${response.statusText}`);
}
const data = await response.json();
if (!data.success) {
throw new Error(`Production export failed: ${data.error}`);
}
console.log(`✓ Received ${data.count} audit logs from production`);
console.log(` Exported at: ${data.exported_at}`);
if (data.count === 0) {
console.log('\n✓ No new logs to sync');
await mongoose.disconnect();
return { synced: 0, skipped: 0 };
}
// Import logs to dev
console.log('\n📥 Importing to dev environment...');
let imported = 0;
let skipped = 0;
let errors = 0;
for (const log of data.logs) {
try {
// Check if already exists (by _id from production)
const exists = await AuditLog.findOne({
'sync_metadata.original_id': log._id
});
if (exists) {
skipped++;
continue;
}
if (dryRun) {
console.log(` [DRY RUN] Would import: ${log.service} - ${log.timestamp}`);
imported++;
continue;
}
// Create new log in dev with environment tagging
const devLog = {
...log,
_id: undefined, // Let MongoDB generate new _id for dev
// Environment metadata
environment: 'production',
synced_at: new Date(),
is_local: false,
// Sync tracking
sync_metadata: {
original_id: log._id,
synced_from: 'production',
sync_batch: data.exported_at,
sanitized: log._sanitized || false
}
};
await AuditLog.create(devLog);
imported++;
} catch (error) {
console.error(` ✗ Error importing log ${log._id}:`, error.message);
errors++;
}
}
// Update sync metadata
if (!dryRun) {
syncMeta.last_sync_time = new Date(data.exported_at);
syncMeta.stats.total_synced += imported;
syncMeta.stats.last_batch_size = imported;
syncMeta.stats.last_batch_duration_ms = Date.now() - startTime;
syncMeta.stats.errors_count += errors;
syncMeta.last_result = {
success: errors === 0,
synced_count: imported,
timestamp: new Date()
};
await syncMeta.save();
}
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
console.log('\n' + '═'.repeat(60));
console.log(' SYNC SUMMARY');
console.log('═'.repeat(60));
console.log(` Imported: ${imported}`);
console.log(` Skipped (duplicates): ${skipped}`);
console.log(` Errors: ${errors}`);
console.log(` Duration: ${duration}s`);
if (dryRun) {
console.log('\n ⚠️ DRY RUN - No data was actually imported');
}
console.log('═'.repeat(60));
await mongoose.disconnect();
console.log('\n✓ Sync complete');
return { synced: imported, skipped, errors };
} catch (error) {
console.error('\n❌ Sync failed:', error.message);
console.error(error.stack);
await mongoose.disconnect();
process.exit(1);
}
}
// Parse command line arguments
const args = process.argv.slice(2);
const options = {};
for (const arg of args) {
if (arg === '--dry-run') {
options.dryRun = true;
} else if (arg.startsWith('--since=')) {
options.since = arg.split('=')[1];
} else if (arg === '--help') {
console.log(`
Usage: node scripts/sync-prod-audit-logs.js [options]
Options:
--since=YYYY-MM-DD Sync logs from specific date (default: last sync time)
--dry-run Preview what would be synced without importing
--help Show this help message
Environment Variables:
PROD_API_URL Production API base URL (default: https://agenticgovernance.digital)
PROD_ADMIN_TOKEN Production admin JWT token (required)
Examples:
# Sync new logs since last sync
node scripts/sync-prod-audit-logs.js
# Sync logs from specific date
node scripts/sync-prod-audit-logs.js --since=2025-10-01
# Preview sync without importing
node scripts/sync-prod-audit-logs.js --dry-run
`);
process.exit(0);
}
}
// Run sync
console.log('🔄 Starting production audit log sync...\n');
syncProductionAuditLogs(options);