tractatus/scripts/check-confidential-docs.js
TheFlow 7cd44118ee fix(deployment): handle YAML frontmatter confidential: false marker
Enhanced confidential document scanner to parse YAML frontmatter:
- Detects YAML frontmatter blocks (--- ... ---)
- Checks for explicit "confidential: false" declaration
- Skips false positive on documents marked non-confidential

Previously blocked: docs with "confidential:" even when set to false
Now allows: docs with explicit "confidential: false" in frontmatter

Fixes deployment blocking of BI documentation which is marked
for public release with confidential: false metadata.

Related: inst_012, inst_015 (confidential document protection)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-27 10:44:54 +13:00

171 lines
4.8 KiB
JavaScript
Executable file

#!/usr/bin/env node
/**
* Confidential Document Scanner - Enforces inst_012, inst_015
* Prevents deployment of internal/confidential documents
*/
const fs = require('fs');
const path = require('path');
// File patterns that indicate confidential/internal documents
const CONFIDENTIAL_PATTERNS = [
/session[-_]?handoff/i,
/phase[-_]?planning/i,
/cost[-_]?estimate/i,
/infrastructure[-_]?plan/i,
/progress[-_]?report/i,
/cover[-_]?letter/i,
/testing[-_]?checklist/i,
/internal/i,
/confidential/i,
/private/i,
/draft/i,
/wip[-_]/i, // work in progress
/todo/i
];
// Content markers that indicate confidential information
const CONFIDENTIAL_CONTENT_MARKERS = [
/\[INTERNAL\]/i,
/\[CONFIDENTIAL\]/i,
/\[DRAFT\]/i,
/\[DO NOT PUBLISH\]/i,
/\[WIP\]/i,
/CONFIDENTIAL:/i,
/INTERNAL ONLY:/i
];
function checkFilePath(filePath) {
const basename = path.basename(filePath);
for (const pattern of CONFIDENTIAL_PATTERNS) {
if (pattern.test(basename) || pattern.test(filePath)) {
return {
confidential: true,
reason: `Filename matches confidential pattern: ${pattern.source}`
};
}
}
return { confidential: false };
}
function checkFileContent(filePath) {
try {
const content = fs.readFileSync(filePath, 'utf8');
const lines = content.split('\n');
// Check for YAML frontmatter
if (lines[0] === '---') {
let yamlEnd = -1;
for (let i = 1; i < Math.min(50, lines.length); i++) {
if (lines[i] === '---') {
yamlEnd = i;
break;
}
}
// If we found YAML frontmatter, check for explicit confidential: false
if (yamlEnd > 0) {
const yamlContent = lines.slice(1, yamlEnd).join('\n');
if (/confidential:\s*false/i.test(yamlContent)) {
// Explicitly marked as NOT confidential
return { confidential: false };
}
}
}
for (let i = 0; i < Math.min(20, lines.length); i++) {
for (const marker of CONFIDENTIAL_CONTENT_MARKERS) {
if (marker.test(lines[i])) {
return {
confidential: true,
reason: `Content contains confidential marker at line ${i+1}: ${marker.source}`,
line: i + 1,
text: lines[i].trim()
};
}
}
}
return { confidential: false };
} catch (err) {
// Can't read file (binary, etc.) - check by path only
return { confidential: false };
}
}
function scanFile(filePath) {
// Skip non-document files
const ext = path.extname(filePath).toLowerCase();
if (!['.md', '.txt', '.pdf', '.doc', '.docx', '.html'].includes(ext)) {
return null;
}
// ONLY check content markers, not filename patterns
// Rationale: Session handoffs and internal docs are fine in docs/ directory
// as long as they don't contain actual sensitive content (credentials, etc.)
// Filename patterns are too broad and catch legitimate internal documentation
const contentCheck = checkFileContent(filePath);
if (contentCheck.confidential) {
return { file: filePath, ...contentCheck };
}
return null;
}
function main() {
const args = process.argv.slice(2);
if (args.length === 0) {
console.log('Usage: check-confidential-docs.js <file1> [file2] ...');
console.log('');
console.log('Scans files to prevent deployment of internal/confidential documents');
process.exit(0);
}
console.log(`\n🔍 Scanning ${args.length} file(s) for confidential markers...\n`);
const findings = [];
args.forEach(file => {
if (!fs.existsSync(file)) return;
const result = scanFile(file);
if (result) {
findings.push(result);
}
});
if (findings.length === 0) {
console.log('✅ No confidential documents detected\n');
process.exit(0);
}
// Report findings
console.log(`❌ Found ${findings.length} confidential document(s):\n`);
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
findings.forEach(f => {
console.log(`🔴 ${f.file}`);
console.log(` Reason: ${f.reason}`);
if (f.text) {
console.log(` Line ${f.line}: ${f.text.substring(0, 60)}...`);
}
console.log('');
});
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
console.log('⚠️ DEPLOYMENT BLOCKED (inst_012/inst_015)\n');
console.log('These documents are marked confidential/internal.');
console.log('');
console.log('Actions:');
console.log(' 1. Remove confidential markers if approved for public release');
console.log(' 2. Move to a non-public directory');
console.log(' 3. Get explicit human approval before deploying\n');
process.exit(1);
}
main();