#!/usr/bin/env node /** * Export Anonymized Audit Data for Hugging Face Space * * Exports audit decisions from MongoDB in a safe, anonymized format * for the Tractatus Audit Log Viewer on Hugging Face Spaces. * * Usage: node scripts/export-hf-audit-data.js [--limit=1000] */ const { MongoClient } = require('mongodb'); const fs = require('fs'); const path = require('path'); // MongoDB connection const MONGO_URI = process.env.MONGO_URI || 'mongodb://localhost:27017'; const DB_NAME = 'tractatus_dev'; // Parse command line args const args = process.argv.slice(2); const limitArg = args.find(arg => arg.startsWith('--limit=')); const LIMIT = limitArg ? parseInt(limitArg.split('=')[1]) : null; async function exportAuditData() { console.log('šŸ” Tractatus Audit Data Export for Hugging Face'); console.log('================================================\n'); const client = new MongoClient(MONGO_URI); try { await client.connect(); console.log('āœ“ Connected to MongoDB'); const db = client.db(DB_NAME); const auditCollection = db.collection('auditLogs'); // Count total decisions const totalCount = await auditCollection.countDocuments(); console.log(`āœ“ Found ${totalCount} total audit logs`); if (LIMIT) { console.log(` → Limiting export to ${LIMIT} decisions`); } // Build aggregation pipeline const pipeline = [ { $project: { _id: 0, timestamp: '$timestamp', action: '$action', service: { $cond: { if: { $eq: ['$action', 'context_pressure_analysis'] }, then: 'ContextPressureMonitor', else: { $cond: { if: { $regexMatch: { input: '$action', regex: /boundary/ } }, then: 'BoundaryEnforcer', else: '$action' } } } }, decision: { $cond: { if: '$allowed', then: 'allow', else: 'deny' } }, boundary: '$boundary', boundary_domain: '$domain', context_pressure: '$metadata.pressure_level', metadata: '$metadata', // Exclude sensitive data // No file paths, no specific code, no user info } }, { $sort: { timestamp: -1 } } ]; if (LIMIT) { pipeline.push({ $limit: LIMIT }); } console.log('\nšŸ“Š Exporting audit decisions...'); const decisions = await auditCollection.aggregate(pipeline).toArray(); console.log(`āœ“ Exported ${decisions.length} decisions`); // Anonymize and clean data const anonymized = decisions.map(d => { // Clean boundary format let boundary = d.boundary || 'N/A'; if (boundary && typeof boundary === 'string' && boundary.includes(':')) { // Format: "12.2: Innovation cannot be proceduralized" const parts = boundary.split(':'); boundary = `${parts[0].trim()}: ${parts[1]?.trim() || ''}`.substring(0, 100); } // Extract service name from action let service = d.service || 'Unknown'; // Determine if there was service coordination (check metadata) let coordination = []; if (d.metadata && d.metadata.services_involved) { coordination = d.metadata.services_involved; } return { timestamp: d.timestamp, action: d.action, service: service, decision: d.decision || 'unknown', boundary: boundary, boundary_domain: d.boundary_domain || null, context_pressure: d.context_pressure || 'NORMAL', coordination: coordination }; }); // Statistics console.log('\nšŸ“ˆ Export Statistics:'); console.log(` Total decisions: ${anonymized.length}`); const serviceCount = {}; const decisionCount = {}; const pressureCount = {}; anonymized.forEach(d => { serviceCount[d.service] = (serviceCount[d.service] || 0) + 1; decisionCount[d.decision] = (decisionCount[d.decision] || 0) + 1; pressureCount[d.context_pressure] = (pressureCount[d.context_pressure] || 0) + 1; }); console.log('\n By Service:'); Object.entries(serviceCount).sort((a, b) => b[1] - a[1]).forEach(([service, count]) => { console.log(` ${service}: ${count}`); }); console.log('\n By Decision:'); Object.entries(decisionCount).forEach(([decision, count]) => { console.log(` ${decision}: ${count}`); }); console.log('\n By Pressure Level:'); Object.entries(pressureCount).forEach(([pressure, count]) => { console.log(` ${pressure}: ${count}`); }); // Write to JSON file const outputPath = path.join(__dirname, '..', 'hf-spaces', 'audit-log-viewer', 'audit-decisions.json'); // Ensure directory exists const outputDir = path.dirname(outputPath); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); console.log(`\nāœ“ Created output directory: ${outputDir}`); } fs.writeFileSync(outputPath, JSON.stringify(anonymized, null, 2)); const fileSize = (fs.statSync(outputPath).size / 1024 / 1024).toFixed(2); console.log(`\nāœ“ Exported to: ${outputPath}`); console.log(` File size: ${fileSize} MB`); console.log('\nāœ… Export complete!'); console.log('\nNext steps:'); console.log(' 1. Review audit-decisions.json for any sensitive data'); console.log(' 2. Copy to HF Space repository'); console.log(' 3. Deploy Gradio app'); } catch (error) { console.error('āŒ Export failed:', error); process.exit(1); } finally { await client.close(); } } // Run export exportAuditData();