tractatus/scripts/import-outreach-articles.js
TheFlow ac2db33732 fix(submissions): restructure Economist package and fix article display
- Create Economist SubmissionTracking package correctly:
  * mainArticle = full blog post content
  * coverLetter = 216-word SIR— letter
  * Links to blog post via blogPostId
- Archive 'Letter to The Economist' from blog posts (it's the cover letter)
- Fix date display on article cards (use published_at)
- Target publication already displaying via blue badge

Database changes:
- Make blogPostId optional in SubmissionTracking model
- Economist package ID: 68fa85ae49d4900e7f2ecd83
- Le Monde package ID: 68fa2abd2e6acd5691932150

Next: Enhanced modal with tabs, validation, export

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-24 08:47:42 +13:00

511 lines
16 KiB
JavaScript
Executable file

#!/usr/bin/env node
/**
* Import Outreach Articles Script
* Imports existing articles from docs/outreach/ into database
* Creates BlogPost and SubmissionTracking records
* Runs content analysis on each article
*/
const fs = require('fs').promises;
const path = require('path');
const mongoose = require('mongoose');
require('dotenv').config();
const BlogPost = require('../src/models/BlogPost.model');
const SubmissionTracking = require('../src/models/SubmissionTracking.model');
const User = require('../src/models/User.model');
const publicationConfig = require('../src/config/publication-targets.config');
const { getInstance: getContentAnalyzer } = require('../src/services/ContentAnalyzer.service');
const logger = require('../src/utils/logger.util');
// MongoDB connection
const MONGODB_URI = process.env.MONGODB_URI || 'mongodb://localhost:27017/tractatus_dev';
class ArticleImporter {
constructor() {
this.contentAnalyzer = null;
this.adminUser = null;
this.stats = {
processed: 0,
imported: 0,
skipped: 0,
errors: 0
};
}
/**
* Connect to MongoDB
*/
async connect() {
try {
await mongoose.connect(MONGODB_URI);
logger.info('✅ Connected to MongoDB');
} catch (error) {
logger.error('❌ MongoDB connection error:', error);
throw error;
}
}
/**
* Initialize services and get admin user
*/
async initialize() {
// Get admin user for createdBy field
const users = await User.list();
this.adminUser = users.find(u => u.role === 'admin');
if (!this.adminUser) {
throw new Error('No admin user found. Please create an admin user first.');
}
logger.info(`Using admin user: ${this.adminUser.email}`);
// Initialize content analyzer
this.contentAnalyzer = getContentAnalyzer();
}
/**
* Parse markdown file to extract metadata and content
*/
async parseMarkdownFile(filePath) {
const content = await fs.readFile(filePath, 'utf-8');
const lines = content.split('\n');
const filename = path.basename(filePath, '.md');
const metadata = {
title: null,
subtitle: null,
wordCount: null,
format: null,
contentType: null,
category: null,
targetPublication: null,
targetSection: null,
primaryContact: null,
submissionEmail: null,
status: 'drafted'
};
let bodyStart = 0;
let inMetadata = false;
// Check for markdown header title
if (lines[0].trim().startsWith('# ')) {
metadata.title = lines[0].trim().replace(/^#\s+/, '');
}
// Parse metadata section
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
if (line === '**SUBMISSION METADATA**' || line === '**SUBMISSION METADATA:**') {
inMetadata = true;
continue;
}
if (inMetadata && line === '---') {
bodyStart = i + 1;
break;
}
if (inMetadata) {
// Parse key-value pairs
if (line.startsWith('**Title:**')) {
// Override markdown title if explicit Title field exists
metadata.title = line.replace('**Title:**', '').trim();
} else if (line.startsWith('**Subtitle:**')) {
metadata.subtitle = line.replace('**Subtitle:**', '').trim();
} else if (line.startsWith('**Word Count:**')) {
const match = line.match(/(\d+)/);
metadata.wordCount = match ? parseInt(match[1]) : null;
} else if (line.startsWith('**Format:**')) {
metadata.format = line.replace('**Format:**', '').trim();
} else if (line.startsWith('**Category:**')) {
metadata.category = line.replace('**Category:**', '').trim();
} else if (line.startsWith('**Target Section:**')) {
metadata.targetSection = line.replace('**Target Section:**', '').trim();
} else if (line.startsWith('**Primary Contact:**')) {
metadata.primaryContact = line.replace('**Primary Contact:**', '').trim();
} else if (line.startsWith('**Submit to:**')) {
metadata.submissionEmail = line.replace('**Submit to:**', '').trim();
} else if (line.startsWith('**Contact:**')) {
const emailMatch = line.match(/([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/);
if (emailMatch && !metadata.submissionEmail) {
metadata.submissionEmail = emailMatch[1];
}
}
}
}
// Extract main article body (skip metadata and supporting materials)
const bodyLines = [];
let startedContent = false;
for (let i = bodyStart; i < lines.length; i++) {
const line = lines[i];
// Stop at supporting materials, end markers, or submission notes
if (line.includes('## SUPPORTING MATERIALS') ||
line.includes('## PITCH LETTER') ||
line.includes('## SUBMISSION NOTES') ||
line.includes('**SUPPORTING MATERIALS') ||
line.includes('**SUBMISSION STRATEGY') ||
line.includes('**SUBMISSION NOTES') ||
line.includes('**END OF ARTICLE**')) {
break;
}
// Skip separators and empty lines until we find real content
if (!startedContent) {
if (line.trim() === '---' || line.trim() === '') {
continue;
}
startedContent = true;
}
bodyLines.push(line);
}
const body = bodyLines.join('\n').trim();
// Debug logging
if (body.length < 100) {
logger.warn(`Body extraction debug for ${filename}:`);
logger.warn(` bodyStart: ${bodyStart}`);
logger.warn(` bodyLines count: ${bodyLines.length}`);
logger.warn(` body length: ${body.length}`);
logger.warn(` First 5 bodyLines:`, bodyLines.slice(0, 5));
}
// Determine content type from format or filename
if (metadata.format) {
const formatLower = metadata.format.toLowerCase();
if (formatLower.includes('letter')) {
metadata.contentType = 'letter';
} else if (formatLower.includes('op-ed') || formatLower.includes('opinion')) {
metadata.contentType = 'oped';
} else if (formatLower.includes('essay') || formatLower.includes('article')) {
metadata.contentType = 'essay';
}
}
// Extract target publication from filename
if (filename.includes('Economist')) {
metadata.targetPublication = filename.includes('Letter') ? 'economist-letter' : 'economist-article';
} else if (filename.includes('NYT')) {
metadata.targetPublication = 'nyt-oped';
}
return {
metadata,
body,
filename
};
}
/**
* Generate slug from title
*/
generateSlug(title) {
return title
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');
}
/**
* Import single article
*/
async importArticle(filePath) {
logger.info(`\n📄 Processing: ${path.basename(filePath)}`);
try {
// Parse markdown file
const { metadata, body, filename } = await this.parseMarkdownFile(filePath);
if (!metadata.title) {
logger.warn(`⚠️ Skipping ${filename}: No title found`);
this.stats.skipped++;
return null;
}
if (!body || body.length < 100) {
logger.warn(`⚠️ Skipping ${filename}: No content body found`);
this.stats.skipped++;
return null;
}
logger.info(` Title: ${metadata.title}`);
logger.info(` Word Count: ${metadata.wordCount || body.split(/\s+/).length}`);
logger.info(` Type: ${metadata.contentType || 'unknown'}`);
logger.info(` Target: ${metadata.targetPublication || 'none'}`);
// Check if already imported
const slug = this.generateSlug(metadata.title);
const existing = await BlogPost.findBySlug(slug);
if (existing) {
logger.warn(` ⚠️ Already exists with slug: ${slug}`);
this.stats.skipped++;
return null;
}
// Run content analysis (optional - don't block import if analysis fails)
logger.info(` 🔍 Running content analysis...`);
let analysis = null;
try {
analysis = await this.contentAnalyzer.analyzeArticle({
title: metadata.title,
content: body,
wordCount: metadata.wordCount || body.split(/\s+/).length,
targetPublication: metadata.targetPublication
});
} catch (analysisError) {
logger.warn(` ⚠️ Content analysis failed: ${analysisError.message}`);
logger.warn(` ⚠️ Continuing with import without analysis...`);
// Create minimal analysis object
analysis = {
tone: { primary: 'unknown', confidence: 0 },
audience: { primary: 'unknown', confidence: 0 },
themes: [],
tractatus: { frameworkAlignment: 0, quadrant: 'OPERATIONAL', valuesSensitive: false }
};
}
// Create BlogPost
const blogPost = await BlogPost.create({
title: metadata.title,
slug: slug,
author: {
type: 'human',
name: 'John Stroh'
},
content: body,
excerpt: metadata.subtitle || body.substring(0, 200) + '...',
status: 'published', // Mark as published since these are completed articles
moderation: {
ai_analysis: JSON.stringify(analysis),
human_reviewer: this.adminUser._id,
review_notes: 'Imported from existing outreach materials',
approved_at: new Date()
},
tractatus_classification: {
quadrant: analysis.tractatus?.quadrant || 'OPERATIONAL',
values_sensitive: analysis.tractatus?.valuesSensitive || false,
requires_strategic_review: false
},
published_at: new Date(),
tags: this._extractTags(analysis, metadata)
});
logger.info(` ✅ BlogPost created: ${blogPost._id}`);
// Create SubmissionTracking if target publication specified
let submissionTracking = null;
if (metadata.targetPublication) {
const publication = publicationConfig.getPublicationById(metadata.targetPublication);
if (publication) {
submissionTracking = await SubmissionTracking.create({
blogPostId: blogPost._id,
publicationId: metadata.targetPublication,
publicationName: publication.name,
title: metadata.title,
wordCount: metadata.wordCount || body.split(/\s+/).length,
contentType: metadata.contentType || this._mapPublicationType(publication.type),
status: 'ready', // These are ready to submit
submissionMethod: metadata.submissionEmail ? 'email' : 'form',
submissionEmail: metadata.submissionEmail,
editorContact: metadata.primaryContact,
expectedResponseDays: publication.requirements?.responseTime || null,
notes: [{
content: `Imported from ${filename}. Original metadata preserved.`,
author: this.adminUser._id,
createdAt: new Date()
}],
createdBy: this.adminUser._id,
lastUpdatedBy: this.adminUser._id
});
logger.info(` ✅ SubmissionTracking created: ${submissionTracking._id}`);
}
}
// Log analysis summary
logger.info(` 📊 Analysis Summary:`);
logger.info(` Tone: ${analysis.tone?.primary} (${Math.round((analysis.tone?.confidence || 0) * 100)}%)`);
logger.info(` Audience: ${analysis.audience?.primary} (${Math.round((analysis.audience?.confidence || 0) * 100)}%)`);
if (metadata.targetPublication && analysis.publicationFit) {
logger.info(` Publication Fit: ${analysis.publicationFit.score}/10`);
if (analysis.publicationFit.score < 7) {
logger.warn(` ⚠️ Low fit score - recommendations:`);
analysis.publicationFit.recommendations?.forEach(rec => {
logger.warn(` - ${rec}`);
});
}
}
this.stats.imported++;
return {
blogPost,
submissionTracking,
analysis
};
} catch (error) {
logger.error(` ❌ Error importing ${path.basename(filePath)}:`, error);
this.stats.errors++;
return null;
}
}
/**
* Import all articles from directory
*/
async importDirectory(dirPath) {
logger.info(`\n🚀 Starting import from: ${dirPath}\n`);
const files = await fs.readdir(dirPath);
const markdownFiles = files.filter(f =>
f.endsWith('.md') &&
!f.includes('STRATEGY') &&
!f.includes('INSTRUCTIONS') &&
!f.includes('SUMMARY') &&
!f.includes('ANALYSIS') &&
!f.includes('RECRUITMENT') &&
!f.includes('PRESENTATION')
);
logger.info(`Found ${markdownFiles.length} article files to process\n`);
const results = [];
for (const file of markdownFiles) {
this.stats.processed++;
const filePath = path.join(dirPath, file);
const result = await this.importArticle(filePath);
if (result) {
results.push({ file, ...result });
}
}
return results;
}
/**
* Extract tags from analysis and metadata
*/
_extractTags(analysis, metadata) {
const tags = ['imported', 'outreach'];
if (metadata.targetPublication) {
tags.push(metadata.targetPublication);
}
if (metadata.contentType) {
tags.push(metadata.contentType);
}
if (analysis.themes) {
analysis.themes.slice(0, 3).forEach(theme => {
tags.push(theme.theme.toLowerCase().replace(/\s+/g, '-'));
});
}
return [...new Set(tags)]; // Remove duplicates
}
/**
* Map publication type to content type
*/
_mapPublicationType(pubType) {
const mapping = {
'letter': 'letter',
'oped': 'oped',
'essay': 'essay',
'social': 'social'
};
return mapping[pubType] || 'essay';
}
/**
* Print summary
*/
printSummary(results) {
logger.info(`\n${'='.repeat(60)}`);
logger.info(` IMPORT SUMMARY`);
logger.info(`${'='.repeat(60)}\n`);
logger.info(` Processed: ${this.stats.processed}`);
logger.info(` Imported: ${this.stats.imported}`);
logger.info(` Skipped: ${this.stats.skipped}`);
logger.info(` Errors: ${this.stats.errors}\n`);
if (results.length > 0) {
logger.info(` Imported Articles:\n`);
results.forEach(result => {
logger.info(`${result.blogPost.title}`);
logger.info(` - BlogPost ID: ${result.blogPost._id}`);
if (result.submissionTracking) {
logger.info(` - Submission ID: ${result.submissionTracking._id}`);
logger.info(` - Target: ${result.submissionTracking.publicationName}`);
}
if (result.analysis && result.analysis.publicationFit) {
logger.info(` - Fit Score: ${result.analysis.publicationFit.score}/10`);
}
logger.info('');
});
}
logger.info(`${'='.repeat(60)}\n`);
}
/**
* Close connections
*/
async close() {
await mongoose.connection.close();
logger.info('✅ Disconnected from MongoDB');
}
}
// Main execution
async function main() {
const importer = new ArticleImporter();
try {
// Connect to database
await importer.connect();
// Initialize services
await importer.initialize();
// Import articles
const outreachDir = path.join(__dirname, '../docs/outreach');
const results = await importer.importDirectory(outreachDir);
// Print summary
importer.printSummary(results);
// Close connections
await importer.close();
process.exit(0);
} catch (error) {
logger.error('Fatal error:', error);
await importer.close();
process.exit(1);
}
}
// Run if called directly
if (require.main === module) {
main();
}
module.exports = ArticleImporter;