- extractMetadata was not including the slug field from frontMatter - Added slug: frontMatter.slug to the returned metadata object - Now the slug check on line 144 will actually find the frontmatter slug - Fixes glossary translation slugs (glossary-de, glossary-fr) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
393 lines
12 KiB
JavaScript
Executable file
393 lines
12 KiB
JavaScript
Executable file
#!/usr/bin/env node
|
|
/**
|
|
* Document Migration Script
|
|
* Migrates markdown documents into the MongoDB database
|
|
*
|
|
* Usage:
|
|
* npm run migrate:docs # Interactive mode
|
|
* node scripts/migrate-documents.js --source /path/to/docs --dry-run
|
|
* node scripts/migrate-documents.js --source /path/to/docs --force
|
|
*/
|
|
|
|
require('dotenv').config();
|
|
|
|
const fs = require('fs').promises;
|
|
const path = require('path');
|
|
const { connect, close } = require('../src/utils/db.util');
|
|
const Document = require('../src/models/Document.model');
|
|
const { markdownToHtml, extractTOC, generateSlug } = require('../src/utils/markdown.util');
|
|
const logger = require('../src/utils/logger.util');
|
|
|
|
// Parse command line arguments
|
|
const args = process.argv.slice(2);
|
|
const sourceArg = args.indexOf('--source');
|
|
const dryRun = args.includes('--dry-run');
|
|
const force = args.includes('--force');
|
|
|
|
// Default source paths
|
|
const DEFAULT_SOURCES = [
|
|
'/home/theflow/projects/tractatus/docs/markdown',
|
|
'/home/theflow/projects/sydigital/stochastic/innovation-exploration/anthropic-submission'
|
|
];
|
|
|
|
/**
|
|
* Extract front matter from markdown
|
|
*/
|
|
function extractFrontMatter(content) {
|
|
const frontMatterRegex = /^---\n([\s\S]*?)\n---\n([\s\S]*)$/;
|
|
const match = content.match(frontMatterRegex);
|
|
|
|
if (!match) {
|
|
return { frontMatter: {}, content };
|
|
}
|
|
|
|
const frontMatterText = match[1];
|
|
const remainingContent = match[2];
|
|
|
|
// Parse YAML-like front matter
|
|
const frontMatter = {};
|
|
frontMatterText.split('\n').forEach(line => {
|
|
const [key, ...valueParts] = line.split(':');
|
|
if (key && valueParts.length > 0) {
|
|
const value = valueParts.join(':').trim();
|
|
frontMatter[key.trim()] = value.replace(/^["']|["']$/g, ''); // Remove quotes
|
|
}
|
|
});
|
|
|
|
return { frontMatter, content: remainingContent };
|
|
}
|
|
|
|
/**
|
|
* Extract metadata from filename and content
|
|
*/
|
|
function extractMetadata(filename, content, frontMatter) {
|
|
// Try to extract document identifier from filename
|
|
// Patterns: TRA-VAL-0001, STO-INN-0010, etc.
|
|
const identifierMatch = filename.match(/([A-Z]{3}-[A-Z]{3}-\d{4})/);
|
|
const identifier = identifierMatch ? identifierMatch[1] : null;
|
|
|
|
// Extract quadrant from identifier
|
|
let quadrant = null;
|
|
if (identifier) {
|
|
const [quad] = identifier.split('-');
|
|
const quadrantMap = {
|
|
'STR': 'strategic',
|
|
'OPS': 'operational',
|
|
'TAC': 'tactical',
|
|
'SYS': 'system',
|
|
'STO': 'stochastic'
|
|
};
|
|
quadrant = quadrantMap[quad] || null;
|
|
}
|
|
|
|
// Extract title from first H1 or front matter
|
|
let title = frontMatter.title || null;
|
|
if (!title) {
|
|
const h1Match = content.match(/^#\s+(.+)$/m);
|
|
title = h1Match ? h1Match[1] : path.basename(filename, '.md');
|
|
}
|
|
|
|
// Extract version from identifier or front matter
|
|
let version = frontMatter.version || '1.0';
|
|
if (identifier && identifier.match(/v(\d+-\d+)/)) {
|
|
version = identifier.match(/v(\d+-\d+)/)[1].replace('-', '.');
|
|
}
|
|
|
|
// Determine document type
|
|
let type = frontMatter.type || 'governance';
|
|
if (filename.includes('technical-proposal')) type = 'technical';
|
|
else if (filename.includes('appendix')) type = 'technical';
|
|
else if (filename.includes('framework')) type = 'framework';
|
|
else if (filename.includes('whitepaper')) type = 'research';
|
|
else if (filename.includes('case-stud')) type = 'case-study';
|
|
|
|
// Extract author
|
|
const author = frontMatter.author || 'System';
|
|
|
|
// Extract tags
|
|
const tags = frontMatter.tags
|
|
? frontMatter.tags.split(',').map(t => t.trim())
|
|
: [];
|
|
|
|
return {
|
|
identifier,
|
|
title,
|
|
type,
|
|
quadrant,
|
|
version,
|
|
author,
|
|
tags,
|
|
status: 'published',
|
|
slug: frontMatter.slug // Include slug from frontmatter if present
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Process a single markdown file
|
|
*/
|
|
async function processMarkdownFile(filePath, sourcePath) {
|
|
const filename = path.basename(filePath);
|
|
const rawContent = await fs.readFile(filePath, 'utf-8');
|
|
|
|
// Extract front matter
|
|
const { frontMatter, content } = extractFrontMatter(rawContent);
|
|
|
|
// Extract metadata
|
|
const metadata = extractMetadata(filename, content, frontMatter);
|
|
|
|
// Convert to HTML
|
|
const htmlContent = markdownToHtml(content);
|
|
|
|
// Extract table of contents
|
|
const tableOfContents = extractTOC(content);
|
|
|
|
// Use slug from frontmatter if present, otherwise generate from title
|
|
const slug = metadata.slug || generateSlug(metadata.title);
|
|
|
|
// Determine if document should be public
|
|
// Internal document patterns (should NOT be public)
|
|
const internalPatterns = [
|
|
'session-handoff',
|
|
'phase-2',
|
|
'phase-3',
|
|
'testing',
|
|
'progress-report',
|
|
'blog-post-outlines',
|
|
'cost-estimates',
|
|
'deployment-guide',
|
|
'kickoff-checklist',
|
|
'preparation-advisory',
|
|
'soft-launch',
|
|
'implementation-session',
|
|
'test-suite'
|
|
];
|
|
|
|
// Check if filename or slug matches internal patterns
|
|
const isInternal = internalPatterns.some(pattern =>
|
|
filename.toLowerCase().includes(pattern) ||
|
|
slug.toLowerCase().includes(pattern)
|
|
);
|
|
|
|
// Determine visibility from front matter or default based on patterns
|
|
let visibility = frontMatter.visibility || null;
|
|
|
|
if (!visibility) {
|
|
// Check for legacy public field
|
|
if (frontMatter.public !== undefined) {
|
|
const isPublic = frontMatter.public === true || frontMatter.public === 'true';
|
|
visibility = isPublic ? 'public' : 'internal';
|
|
} else {
|
|
// Default to internal if matches internal patterns, otherwise public
|
|
visibility = isInternal ? 'internal' : 'public';
|
|
}
|
|
}
|
|
|
|
// Validate visibility value
|
|
const validVisibility = ['public', 'internal', 'confidential', 'archived'];
|
|
if (!validVisibility.includes(visibility)) {
|
|
console.warn(`Invalid visibility '${visibility}' for ${filename}, defaulting to 'internal'`);
|
|
visibility = 'internal';
|
|
}
|
|
|
|
// Determine category from front matter or metadata type
|
|
const category = frontMatter.category || metadata.type || 'none';
|
|
|
|
// Build document object matching Document model schema
|
|
const doc = {
|
|
title: metadata.title,
|
|
slug: slug,
|
|
quadrant: metadata.quadrant,
|
|
persistence: 'HIGH', // Default for technical documents
|
|
visibility: visibility,
|
|
category: category,
|
|
content_html: htmlContent,
|
|
content_markdown: content,
|
|
toc: tableOfContents,
|
|
metadata: {
|
|
author: metadata.author,
|
|
version: metadata.version,
|
|
document_code: metadata.identifier,
|
|
tags: metadata.tags,
|
|
original_filename: filename,
|
|
source_path: path.relative(sourcePath, filePath),
|
|
migrated_at: new Date()
|
|
},
|
|
search_index: content.toLowerCase(),
|
|
translations: {},
|
|
download_formats: {}
|
|
};
|
|
|
|
return doc;
|
|
}
|
|
|
|
/**
|
|
* Find all markdown files in directory
|
|
*/
|
|
async function findMarkdownFiles(dirPath) {
|
|
const files = [];
|
|
|
|
async function scan(dir) {
|
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(dir, entry.name);
|
|
|
|
if (entry.isDirectory()) {
|
|
// Skip node_modules and hidden directories
|
|
if (!entry.name.startsWith('.') && entry.name !== 'node_modules') {
|
|
await scan(fullPath);
|
|
}
|
|
} else if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
// Skip README files
|
|
if (!entry.name.toLowerCase().includes('readme')) {
|
|
files.push(fullPath);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
await scan(dirPath);
|
|
return files;
|
|
}
|
|
|
|
/**
|
|
* Main migration function
|
|
*/
|
|
async function migrate() {
|
|
try {
|
|
console.log('\n=== Tractatus Document Migration ===\n');
|
|
|
|
// Determine source path
|
|
let sourcePath;
|
|
if (sourceArg !== -1 && args[sourceArg + 1]) {
|
|
sourcePath = args[sourceArg + 1];
|
|
} else {
|
|
// Check default sources
|
|
for (const defaultPath of DEFAULT_SOURCES) {
|
|
try {
|
|
const stat = await fs.stat(defaultPath);
|
|
if (stat.isDirectory()) {
|
|
const files = await fs.readdir(defaultPath);
|
|
if (files.length > 0) {
|
|
sourcePath = defaultPath;
|
|
break;
|
|
}
|
|
}
|
|
} catch (err) {
|
|
// Path doesn't exist, try next
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!sourcePath) {
|
|
console.error('❌ No source path specified and no documents found in default locations.');
|
|
console.log('\nUsage: npm run migrate:docs -- --source /path/to/docs');
|
|
console.log('\nDefault locations checked:');
|
|
DEFAULT_SOURCES.forEach(p => console.log(` - ${p}`));
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`📂 Source: ${sourcePath}`);
|
|
console.log(`🔍 Mode: ${dryRun ? 'DRY RUN (no changes)' : 'MIGRATION (will write to database)'}`);
|
|
console.log('');
|
|
|
|
// Find markdown files
|
|
const markdownFiles = await findMarkdownFiles(sourcePath);
|
|
|
|
if (markdownFiles.length === 0) {
|
|
console.log('⚠️ No markdown files found.');
|
|
process.exit(0);
|
|
}
|
|
|
|
console.log(`Found ${markdownFiles.length} markdown file(s):\n`);
|
|
markdownFiles.forEach((file, i) => {
|
|
console.log(` ${i + 1}. ${path.relative(sourcePath, file)}`);
|
|
});
|
|
console.log('');
|
|
|
|
if (!dryRun) {
|
|
// Connect to database
|
|
await connect();
|
|
}
|
|
|
|
// Process each file
|
|
let createdCount = 0;
|
|
let updatedCount = 0;
|
|
let skippedCount = 0;
|
|
let errorsCount = 0;
|
|
|
|
for (const filePath of markdownFiles) {
|
|
try {
|
|
const doc = await processMarkdownFile(filePath, sourcePath);
|
|
const filename = path.basename(filePath);
|
|
|
|
if (dryRun) {
|
|
console.log(`✓ [DRY RUN] ${filename}`);
|
|
console.log(` Title: ${doc.title}`);
|
|
console.log(` Slug: ${doc.slug}`);
|
|
console.log(` Quadrant: ${doc.quadrant || 'none'}`);
|
|
console.log(` Code: ${doc.metadata.document_code || 'none'}`);
|
|
console.log('');
|
|
createdCount++;
|
|
} else {
|
|
// Check if document already exists by slug
|
|
const existing = await Document.findBySlug(doc.slug);
|
|
|
|
if (existing && !force) {
|
|
console.log(`⊘ SKIPPED ${filename} (already exists: ${existing.slug})`);
|
|
skippedCount++;
|
|
} else if (existing && force) {
|
|
// Update existing document
|
|
const updatedDoc = await Document.update(existing._id, doc);
|
|
console.log(`↻ UPDATED ${filename} (${updatedDoc.slug})`);
|
|
updatedCount++;
|
|
} else {
|
|
// Create new document
|
|
const createdDoc = await Document.create(doc);
|
|
console.log(`✓ CREATED ${filename} (${createdDoc.slug})`);
|
|
createdCount++;
|
|
}
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error(`✗ ERROR processing ${path.basename(filePath)}: ${error.message}`);
|
|
logger.error(`Migration error for ${filePath}:`, error);
|
|
errorsCount++;
|
|
}
|
|
}
|
|
|
|
// Summary
|
|
console.log('\n=== Migration Summary ===\n');
|
|
console.log(` Total files: ${markdownFiles.length}`);
|
|
console.log(` Created: ${createdCount}`);
|
|
console.log(` Updated: ${updatedCount}`);
|
|
console.log(` Skipped: ${skippedCount}`);
|
|
console.log(` Errors: ${errorsCount}`);
|
|
console.log('');
|
|
|
|
if (dryRun) {
|
|
console.log('💡 This was a dry run. No changes were made.');
|
|
console.log(' Run without --dry-run to perform actual migration.');
|
|
}
|
|
|
|
if (!dryRun) {
|
|
logger.info(`Document migration completed: ${createdCount} created, ${updatedCount} updated, ${skippedCount} skipped, ${errorsCount} errors`);
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error('\n❌ Migration failed:', error.message);
|
|
logger.error('Migration error:', error);
|
|
process.exit(1);
|
|
} finally {
|
|
if (!dryRun) {
|
|
await close();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Run if called directly
|
|
if (require.main === module) {
|
|
migrate();
|
|
}
|
|
|
|
module.exports = migrate;
|