#!/usr/bin/env node /** * Import Outreach Articles Script * Imports existing articles from docs/outreach/ into database * Creates BlogPost and SubmissionTracking records * Runs content analysis on each article */ const fs = require('fs').promises; const path = require('path'); const mongoose = require('mongoose'); require('dotenv').config(); const BlogPost = require('../src/models/BlogPost.model'); const SubmissionTracking = require('../src/models/SubmissionTracking.model'); const User = require('../src/models/User.model'); const publicationConfig = require('../src/config/publication-targets.config'); const { getInstance: getContentAnalyzer } = require('../src/services/ContentAnalyzer.service'); const logger = require('../src/utils/logger.util'); // MongoDB connection const MONGODB_URI = process.env.MONGODB_URI || 'mongodb://localhost:27017/tractatus_dev'; class ArticleImporter { constructor() { this.contentAnalyzer = null; this.adminUser = null; this.stats = { processed: 0, imported: 0, skipped: 0, errors: 0 }; } /** * Connect to MongoDB */ async connect() { try { await mongoose.connect(MONGODB_URI); logger.info('āœ… Connected to MongoDB'); } catch (error) { logger.error('āŒ MongoDB connection error:', error); throw error; } } /** * Initialize services and get admin user */ async initialize() { // Get admin user for createdBy field const users = await User.list(); this.adminUser = users.find(u => u.role === 'admin'); if (!this.adminUser) { throw new Error('No admin user found. Please create an admin user first.'); } logger.info(`Using admin user: ${this.adminUser.email}`); // Initialize content analyzer this.contentAnalyzer = getContentAnalyzer(); } /** * Parse markdown file to extract metadata and content */ async parseMarkdownFile(filePath) { const content = await fs.readFile(filePath, 'utf-8'); const lines = content.split('\n'); const filename = path.basename(filePath, '.md'); const metadata = { title: null, subtitle: null, wordCount: null, format: null, contentType: null, category: null, targetPublication: null, targetSection: null, primaryContact: null, submissionEmail: null, status: 'drafted' }; let bodyStart = 0; let inMetadata = false; // Check for markdown header title if (lines[0].trim().startsWith('# ')) { metadata.title = lines[0].trim().replace(/^#\s+/, ''); } // Parse metadata section for (let i = 0; i < lines.length; i++) { const line = lines[i].trim(); if (line === '**SUBMISSION METADATA**' || line === '**SUBMISSION METADATA:**') { inMetadata = true; continue; } if (inMetadata && line === '---') { bodyStart = i + 1; break; } if (inMetadata) { // Parse key-value pairs if (line.startsWith('**Title:**')) { // Override markdown title if explicit Title field exists metadata.title = line.replace('**Title:**', '').trim(); } else if (line.startsWith('**Subtitle:**')) { metadata.subtitle = line.replace('**Subtitle:**', '').trim(); } else if (line.startsWith('**Word Count:**')) { const match = line.match(/(\d+)/); metadata.wordCount = match ? parseInt(match[1]) : null; } else if (line.startsWith('**Format:**')) { metadata.format = line.replace('**Format:**', '').trim(); } else if (line.startsWith('**Category:**')) { metadata.category = line.replace('**Category:**', '').trim(); } else if (line.startsWith('**Target Section:**')) { metadata.targetSection = line.replace('**Target Section:**', '').trim(); } else if (line.startsWith('**Primary Contact:**')) { metadata.primaryContact = line.replace('**Primary Contact:**', '').trim(); } else if (line.startsWith('**Submit to:**')) { metadata.submissionEmail = line.replace('**Submit to:**', '').trim(); } else if (line.startsWith('**Contact:**')) { const emailMatch = line.match(/([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/); if (emailMatch && !metadata.submissionEmail) { metadata.submissionEmail = emailMatch[1]; } } } } // Extract main article body (skip metadata and supporting materials) const bodyLines = []; let startedContent = false; for (let i = bodyStart; i < lines.length; i++) { const line = lines[i]; // Stop at supporting materials, end markers, or submission notes if (line.includes('## SUPPORTING MATERIALS') || line.includes('## PITCH LETTER') || line.includes('## SUBMISSION NOTES') || line.includes('**SUPPORTING MATERIALS') || line.includes('**SUBMISSION STRATEGY') || line.includes('**SUBMISSION NOTES') || line.includes('**END OF ARTICLE**')) { break; } // Skip separators and empty lines until we find real content if (!startedContent) { if (line.trim() === '---' || line.trim() === '') { continue; } startedContent = true; } bodyLines.push(line); } const body = bodyLines.join('\n').trim(); // Debug logging if (body.length < 100) { logger.warn(`Body extraction debug for ${filename}:`); logger.warn(` bodyStart: ${bodyStart}`); logger.warn(` bodyLines count: ${bodyLines.length}`); logger.warn(` body length: ${body.length}`); logger.warn(` First 5 bodyLines:`, bodyLines.slice(0, 5)); } // Determine content type from format or filename if (metadata.format) { const formatLower = metadata.format.toLowerCase(); if (formatLower.includes('letter')) { metadata.contentType = 'letter'; } else if (formatLower.includes('op-ed') || formatLower.includes('opinion')) { metadata.contentType = 'oped'; } else if (formatLower.includes('essay') || formatLower.includes('article')) { metadata.contentType = 'essay'; } } // Extract target publication from filename if (filename.includes('Economist')) { metadata.targetPublication = filename.includes('Letter') ? 'economist-letter' : 'economist-article'; } else if (filename.includes('NYT')) { metadata.targetPublication = 'nyt-oped'; } return { metadata, body, filename }; } /** * Generate slug from title */ generateSlug(title) { return title .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, ''); } /** * Import single article */ async importArticle(filePath) { logger.info(`\nšŸ“„ Processing: ${path.basename(filePath)}`); try { // Parse markdown file const { metadata, body, filename } = await this.parseMarkdownFile(filePath); if (!metadata.title) { logger.warn(`āš ļø Skipping ${filename}: No title found`); this.stats.skipped++; return null; } if (!body || body.length < 100) { logger.warn(`āš ļø Skipping ${filename}: No content body found`); this.stats.skipped++; return null; } logger.info(` Title: ${metadata.title}`); logger.info(` Word Count: ${metadata.wordCount || body.split(/\s+/).length}`); logger.info(` Type: ${metadata.contentType || 'unknown'}`); logger.info(` Target: ${metadata.targetPublication || 'none'}`); // Check if already imported const slug = this.generateSlug(metadata.title); const existing = await BlogPost.findBySlug(slug); if (existing) { logger.warn(` āš ļø Already exists with slug: ${slug}`); this.stats.skipped++; return null; } // Run content analysis (optional - don't block import if analysis fails) logger.info(` šŸ” Running content analysis...`); let analysis = null; try { analysis = await this.contentAnalyzer.analyzeArticle({ title: metadata.title, content: body, wordCount: metadata.wordCount || body.split(/\s+/).length, targetPublication: metadata.targetPublication }); } catch (analysisError) { logger.warn(` āš ļø Content analysis failed: ${analysisError.message}`); logger.warn(` āš ļø Continuing with import without analysis...`); // Create minimal analysis object analysis = { tone: { primary: 'unknown', confidence: 0 }, audience: { primary: 'unknown', confidence: 0 }, themes: [], tractatus: { frameworkAlignment: 0, quadrant: 'OPERATIONAL', valuesSensitive: false } }; } // Create BlogPost const blogPost = await BlogPost.create({ title: metadata.title, slug: slug, author: { type: 'human', name: 'John Stroh' }, content: body, excerpt: metadata.subtitle || body.substring(0, 200) + '...', status: 'published', // Mark as published since these are completed articles moderation: { ai_analysis: JSON.stringify(analysis), human_reviewer: this.adminUser._id, review_notes: 'Imported from existing outreach materials', approved_at: new Date() }, tractatus_classification: { quadrant: analysis.tractatus?.quadrant || 'OPERATIONAL', values_sensitive: analysis.tractatus?.valuesSensitive || false, requires_strategic_review: false }, published_at: new Date(), tags: this._extractTags(analysis, metadata) }); logger.info(` āœ… BlogPost created: ${blogPost._id}`); // Create SubmissionTracking if target publication specified let submissionTracking = null; if (metadata.targetPublication) { const publication = publicationConfig.getPublicationById(metadata.targetPublication); if (publication) { submissionTracking = await SubmissionTracking.create({ blogPostId: blogPost._id, publicationId: metadata.targetPublication, publicationName: publication.name, title: metadata.title, wordCount: metadata.wordCount || body.split(/\s+/).length, contentType: metadata.contentType || this._mapPublicationType(publication.type), status: 'ready', // These are ready to submit submissionMethod: metadata.submissionEmail ? 'email' : 'form', submissionEmail: metadata.submissionEmail, editorContact: metadata.primaryContact, expectedResponseDays: publication.requirements?.responseTime || null, notes: [{ content: `Imported from ${filename}. Original metadata preserved.`, author: this.adminUser._id, createdAt: new Date() }], createdBy: this.adminUser._id, lastUpdatedBy: this.adminUser._id }); logger.info(` āœ… SubmissionTracking created: ${submissionTracking._id}`); } } // Log analysis summary logger.info(` šŸ“Š Analysis Summary:`); logger.info(` Tone: ${analysis.tone?.primary} (${Math.round((analysis.tone?.confidence || 0) * 100)}%)`); logger.info(` Audience: ${analysis.audience?.primary} (${Math.round((analysis.audience?.confidence || 0) * 100)}%)`); if (metadata.targetPublication && analysis.publicationFit) { logger.info(` Publication Fit: ${analysis.publicationFit.score}/10`); if (analysis.publicationFit.score < 7) { logger.warn(` āš ļø Low fit score - recommendations:`); analysis.publicationFit.recommendations?.forEach(rec => { logger.warn(` - ${rec}`); }); } } this.stats.imported++; return { blogPost, submissionTracking, analysis }; } catch (error) { logger.error(` āŒ Error importing ${path.basename(filePath)}:`, error); this.stats.errors++; return null; } } /** * Import all articles from directory */ async importDirectory(dirPath) { logger.info(`\nšŸš€ Starting import from: ${dirPath}\n`); const files = await fs.readdir(dirPath); const markdownFiles = files.filter(f => f.endsWith('.md') && !f.includes('STRATEGY') && !f.includes('INSTRUCTIONS') && !f.includes('SUMMARY') && !f.includes('ANALYSIS') && !f.includes('RECRUITMENT') && !f.includes('PRESENTATION') ); logger.info(`Found ${markdownFiles.length} article files to process\n`); const results = []; for (const file of markdownFiles) { this.stats.processed++; const filePath = path.join(dirPath, file); const result = await this.importArticle(filePath); if (result) { results.push({ file, ...result }); } } return results; } /** * Extract tags from analysis and metadata */ _extractTags(analysis, metadata) { const tags = ['imported', 'outreach']; if (metadata.targetPublication) { tags.push(metadata.targetPublication); } if (metadata.contentType) { tags.push(metadata.contentType); } if (analysis.themes) { analysis.themes.slice(0, 3).forEach(theme => { tags.push(theme.theme.toLowerCase().replace(/\s+/g, '-')); }); } return [...new Set(tags)]; // Remove duplicates } /** * Map publication type to content type */ _mapPublicationType(pubType) { const mapping = { 'letter': 'letter', 'oped': 'oped', 'essay': 'essay', 'social': 'social' }; return mapping[pubType] || 'essay'; } /** * Print summary */ printSummary(results) { logger.info(`\n${'='.repeat(60)}`); logger.info(` IMPORT SUMMARY`); logger.info(`${'='.repeat(60)}\n`); logger.info(` Processed: ${this.stats.processed}`); logger.info(` Imported: ${this.stats.imported}`); logger.info(` Skipped: ${this.stats.skipped}`); logger.info(` Errors: ${this.stats.errors}\n`); if (results.length > 0) { logger.info(` Imported Articles:\n`); results.forEach(result => { logger.info(` āœ“ ${result.blogPost.title}`); logger.info(` - BlogPost ID: ${result.blogPost._id}`); if (result.submissionTracking) { logger.info(` - Submission ID: ${result.submissionTracking._id}`); logger.info(` - Target: ${result.submissionTracking.publicationName}`); } if (result.analysis && result.analysis.publicationFit) { logger.info(` - Fit Score: ${result.analysis.publicationFit.score}/10`); } logger.info(''); }); } logger.info(`${'='.repeat(60)}\n`); } /** * Close connections */ async close() { await mongoose.connection.close(); logger.info('āœ… Disconnected from MongoDB'); } } // Main execution async function main() { const importer = new ArticleImporter(); try { // Connect to database await importer.connect(); // Initialize services await importer.initialize(); // Import articles const outreachDir = path.join(__dirname, '../docs/outreach'); const results = await importer.importDirectory(outreachDir); // Print summary importer.printSummary(results); // Close connections await importer.close(); process.exit(0); } catch (error) { logger.error('Fatal error:', error); await importer.close(); process.exit(1); } } // Run if called directly if (require.main === module) { main(); } module.exports = ArticleImporter;