- Create Economist SubmissionTracking package correctly: * mainArticle = full blog post content * coverLetter = 216-word SIR— letter * Links to blog post via blogPostId - Archive 'Letter to The Economist' from blog posts (it's the cover letter) - Fix date display on article cards (use published_at) - Target publication already displaying via blue badge Database changes: - Make blogPostId optional in SubmissionTracking model - Economist package ID: 68fa85ae49d4900e7f2ecd83 - Le Monde package ID: 68fa2abd2e6acd5691932150 Next: Enhanced modal with tabs, validation, export 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
511 lines
16 KiB
JavaScript
Executable file
511 lines
16 KiB
JavaScript
Executable file
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Import Outreach Articles Script
|
|
* Imports existing articles from docs/outreach/ into database
|
|
* Creates BlogPost and SubmissionTracking records
|
|
* Runs content analysis on each article
|
|
*/
|
|
|
|
const fs = require('fs').promises;
|
|
const path = require('path');
|
|
const mongoose = require('mongoose');
|
|
require('dotenv').config();
|
|
|
|
const BlogPost = require('../src/models/BlogPost.model');
|
|
const SubmissionTracking = require('../src/models/SubmissionTracking.model');
|
|
const User = require('../src/models/User.model');
|
|
const publicationConfig = require('../src/config/publication-targets.config');
|
|
const { getInstance: getContentAnalyzer } = require('../src/services/ContentAnalyzer.service');
|
|
const logger = require('../src/utils/logger.util');
|
|
|
|
// MongoDB connection
|
|
const MONGODB_URI = process.env.MONGODB_URI || 'mongodb://localhost:27017/tractatus_dev';
|
|
|
|
class ArticleImporter {
|
|
constructor() {
|
|
this.contentAnalyzer = null;
|
|
this.adminUser = null;
|
|
this.stats = {
|
|
processed: 0,
|
|
imported: 0,
|
|
skipped: 0,
|
|
errors: 0
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Connect to MongoDB
|
|
*/
|
|
async connect() {
|
|
try {
|
|
await mongoose.connect(MONGODB_URI);
|
|
logger.info('✅ Connected to MongoDB');
|
|
} catch (error) {
|
|
logger.error('❌ MongoDB connection error:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Initialize services and get admin user
|
|
*/
|
|
async initialize() {
|
|
// Get admin user for createdBy field
|
|
const users = await User.list();
|
|
this.adminUser = users.find(u => u.role === 'admin');
|
|
|
|
if (!this.adminUser) {
|
|
throw new Error('No admin user found. Please create an admin user first.');
|
|
}
|
|
|
|
logger.info(`Using admin user: ${this.adminUser.email}`);
|
|
|
|
// Initialize content analyzer
|
|
this.contentAnalyzer = getContentAnalyzer();
|
|
}
|
|
|
|
/**
|
|
* Parse markdown file to extract metadata and content
|
|
*/
|
|
async parseMarkdownFile(filePath) {
|
|
const content = await fs.readFile(filePath, 'utf-8');
|
|
const lines = content.split('\n');
|
|
const filename = path.basename(filePath, '.md');
|
|
|
|
const metadata = {
|
|
title: null,
|
|
subtitle: null,
|
|
wordCount: null,
|
|
format: null,
|
|
contentType: null,
|
|
category: null,
|
|
targetPublication: null,
|
|
targetSection: null,
|
|
primaryContact: null,
|
|
submissionEmail: null,
|
|
status: 'drafted'
|
|
};
|
|
|
|
let bodyStart = 0;
|
|
let inMetadata = false;
|
|
|
|
// Check for markdown header title
|
|
if (lines[0].trim().startsWith('# ')) {
|
|
metadata.title = lines[0].trim().replace(/^#\s+/, '');
|
|
}
|
|
|
|
// Parse metadata section
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i].trim();
|
|
|
|
if (line === '**SUBMISSION METADATA**' || line === '**SUBMISSION METADATA:**') {
|
|
inMetadata = true;
|
|
continue;
|
|
}
|
|
|
|
if (inMetadata && line === '---') {
|
|
bodyStart = i + 1;
|
|
break;
|
|
}
|
|
|
|
if (inMetadata) {
|
|
// Parse key-value pairs
|
|
if (line.startsWith('**Title:**')) {
|
|
// Override markdown title if explicit Title field exists
|
|
metadata.title = line.replace('**Title:**', '').trim();
|
|
} else if (line.startsWith('**Subtitle:**')) {
|
|
metadata.subtitle = line.replace('**Subtitle:**', '').trim();
|
|
} else if (line.startsWith('**Word Count:**')) {
|
|
const match = line.match(/(\d+)/);
|
|
metadata.wordCount = match ? parseInt(match[1]) : null;
|
|
} else if (line.startsWith('**Format:**')) {
|
|
metadata.format = line.replace('**Format:**', '').trim();
|
|
} else if (line.startsWith('**Category:**')) {
|
|
metadata.category = line.replace('**Category:**', '').trim();
|
|
} else if (line.startsWith('**Target Section:**')) {
|
|
metadata.targetSection = line.replace('**Target Section:**', '').trim();
|
|
} else if (line.startsWith('**Primary Contact:**')) {
|
|
metadata.primaryContact = line.replace('**Primary Contact:**', '').trim();
|
|
} else if (line.startsWith('**Submit to:**')) {
|
|
metadata.submissionEmail = line.replace('**Submit to:**', '').trim();
|
|
} else if (line.startsWith('**Contact:**')) {
|
|
const emailMatch = line.match(/([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/);
|
|
if (emailMatch && !metadata.submissionEmail) {
|
|
metadata.submissionEmail = emailMatch[1];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Extract main article body (skip metadata and supporting materials)
|
|
const bodyLines = [];
|
|
let startedContent = false;
|
|
|
|
for (let i = bodyStart; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
|
|
// Stop at supporting materials, end markers, or submission notes
|
|
if (line.includes('## SUPPORTING MATERIALS') ||
|
|
line.includes('## PITCH LETTER') ||
|
|
line.includes('## SUBMISSION NOTES') ||
|
|
line.includes('**SUPPORTING MATERIALS') ||
|
|
line.includes('**SUBMISSION STRATEGY') ||
|
|
line.includes('**SUBMISSION NOTES') ||
|
|
line.includes('**END OF ARTICLE**')) {
|
|
break;
|
|
}
|
|
|
|
// Skip separators and empty lines until we find real content
|
|
if (!startedContent) {
|
|
if (line.trim() === '---' || line.trim() === '') {
|
|
continue;
|
|
}
|
|
startedContent = true;
|
|
}
|
|
|
|
bodyLines.push(line);
|
|
}
|
|
|
|
const body = bodyLines.join('\n').trim();
|
|
|
|
// Debug logging
|
|
if (body.length < 100) {
|
|
logger.warn(`Body extraction debug for ${filename}:`);
|
|
logger.warn(` bodyStart: ${bodyStart}`);
|
|
logger.warn(` bodyLines count: ${bodyLines.length}`);
|
|
logger.warn(` body length: ${body.length}`);
|
|
logger.warn(` First 5 bodyLines:`, bodyLines.slice(0, 5));
|
|
}
|
|
|
|
// Determine content type from format or filename
|
|
if (metadata.format) {
|
|
const formatLower = metadata.format.toLowerCase();
|
|
if (formatLower.includes('letter')) {
|
|
metadata.contentType = 'letter';
|
|
} else if (formatLower.includes('op-ed') || formatLower.includes('opinion')) {
|
|
metadata.contentType = 'oped';
|
|
} else if (formatLower.includes('essay') || formatLower.includes('article')) {
|
|
metadata.contentType = 'essay';
|
|
}
|
|
}
|
|
|
|
// Extract target publication from filename
|
|
if (filename.includes('Economist')) {
|
|
metadata.targetPublication = filename.includes('Letter') ? 'economist-letter' : 'economist-article';
|
|
} else if (filename.includes('NYT')) {
|
|
metadata.targetPublication = 'nyt-oped';
|
|
}
|
|
|
|
return {
|
|
metadata,
|
|
body,
|
|
filename
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Generate slug from title
|
|
*/
|
|
generateSlug(title) {
|
|
return title
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, '-')
|
|
.replace(/^-+|-+$/g, '');
|
|
}
|
|
|
|
/**
|
|
* Import single article
|
|
*/
|
|
async importArticle(filePath) {
|
|
logger.info(`\n📄 Processing: ${path.basename(filePath)}`);
|
|
|
|
try {
|
|
// Parse markdown file
|
|
const { metadata, body, filename } = await this.parseMarkdownFile(filePath);
|
|
|
|
if (!metadata.title) {
|
|
logger.warn(`⚠️ Skipping ${filename}: No title found`);
|
|
this.stats.skipped++;
|
|
return null;
|
|
}
|
|
|
|
if (!body || body.length < 100) {
|
|
logger.warn(`⚠️ Skipping ${filename}: No content body found`);
|
|
this.stats.skipped++;
|
|
return null;
|
|
}
|
|
|
|
logger.info(` Title: ${metadata.title}`);
|
|
logger.info(` Word Count: ${metadata.wordCount || body.split(/\s+/).length}`);
|
|
logger.info(` Type: ${metadata.contentType || 'unknown'}`);
|
|
logger.info(` Target: ${metadata.targetPublication || 'none'}`);
|
|
|
|
// Check if already imported
|
|
const slug = this.generateSlug(metadata.title);
|
|
const existing = await BlogPost.findBySlug(slug);
|
|
|
|
if (existing) {
|
|
logger.warn(` ⚠️ Already exists with slug: ${slug}`);
|
|
this.stats.skipped++;
|
|
return null;
|
|
}
|
|
|
|
// Run content analysis (optional - don't block import if analysis fails)
|
|
logger.info(` 🔍 Running content analysis...`);
|
|
let analysis = null;
|
|
try {
|
|
analysis = await this.contentAnalyzer.analyzeArticle({
|
|
title: metadata.title,
|
|
content: body,
|
|
wordCount: metadata.wordCount || body.split(/\s+/).length,
|
|
targetPublication: metadata.targetPublication
|
|
});
|
|
} catch (analysisError) {
|
|
logger.warn(` ⚠️ Content analysis failed: ${analysisError.message}`);
|
|
logger.warn(` ⚠️ Continuing with import without analysis...`);
|
|
// Create minimal analysis object
|
|
analysis = {
|
|
tone: { primary: 'unknown', confidence: 0 },
|
|
audience: { primary: 'unknown', confidence: 0 },
|
|
themes: [],
|
|
tractatus: { frameworkAlignment: 0, quadrant: 'OPERATIONAL', valuesSensitive: false }
|
|
};
|
|
}
|
|
|
|
// Create BlogPost
|
|
const blogPost = await BlogPost.create({
|
|
title: metadata.title,
|
|
slug: slug,
|
|
author: {
|
|
type: 'human',
|
|
name: 'John Stroh'
|
|
},
|
|
content: body,
|
|
excerpt: metadata.subtitle || body.substring(0, 200) + '...',
|
|
status: 'published', // Mark as published since these are completed articles
|
|
moderation: {
|
|
ai_analysis: JSON.stringify(analysis),
|
|
human_reviewer: this.adminUser._id,
|
|
review_notes: 'Imported from existing outreach materials',
|
|
approved_at: new Date()
|
|
},
|
|
tractatus_classification: {
|
|
quadrant: analysis.tractatus?.quadrant || 'OPERATIONAL',
|
|
values_sensitive: analysis.tractatus?.valuesSensitive || false,
|
|
requires_strategic_review: false
|
|
},
|
|
published_at: new Date(),
|
|
tags: this._extractTags(analysis, metadata)
|
|
});
|
|
|
|
logger.info(` ✅ BlogPost created: ${blogPost._id}`);
|
|
|
|
// Create SubmissionTracking if target publication specified
|
|
let submissionTracking = null;
|
|
if (metadata.targetPublication) {
|
|
const publication = publicationConfig.getPublicationById(metadata.targetPublication);
|
|
|
|
if (publication) {
|
|
submissionTracking = await SubmissionTracking.create({
|
|
blogPostId: blogPost._id,
|
|
publicationId: metadata.targetPublication,
|
|
publicationName: publication.name,
|
|
title: metadata.title,
|
|
wordCount: metadata.wordCount || body.split(/\s+/).length,
|
|
contentType: metadata.contentType || this._mapPublicationType(publication.type),
|
|
status: 'ready', // These are ready to submit
|
|
submissionMethod: metadata.submissionEmail ? 'email' : 'form',
|
|
submissionEmail: metadata.submissionEmail,
|
|
editorContact: metadata.primaryContact,
|
|
expectedResponseDays: publication.requirements?.responseTime || null,
|
|
notes: [{
|
|
content: `Imported from ${filename}. Original metadata preserved.`,
|
|
author: this.adminUser._id,
|
|
createdAt: new Date()
|
|
}],
|
|
createdBy: this.adminUser._id,
|
|
lastUpdatedBy: this.adminUser._id
|
|
});
|
|
|
|
logger.info(` ✅ SubmissionTracking created: ${submissionTracking._id}`);
|
|
}
|
|
}
|
|
|
|
// Log analysis summary
|
|
logger.info(` 📊 Analysis Summary:`);
|
|
logger.info(` Tone: ${analysis.tone?.primary} (${Math.round((analysis.tone?.confidence || 0) * 100)}%)`);
|
|
logger.info(` Audience: ${analysis.audience?.primary} (${Math.round((analysis.audience?.confidence || 0) * 100)}%)`);
|
|
|
|
if (metadata.targetPublication && analysis.publicationFit) {
|
|
logger.info(` Publication Fit: ${analysis.publicationFit.score}/10`);
|
|
if (analysis.publicationFit.score < 7) {
|
|
logger.warn(` ⚠️ Low fit score - recommendations:`);
|
|
analysis.publicationFit.recommendations?.forEach(rec => {
|
|
logger.warn(` - ${rec}`);
|
|
});
|
|
}
|
|
}
|
|
|
|
this.stats.imported++;
|
|
|
|
return {
|
|
blogPost,
|
|
submissionTracking,
|
|
analysis
|
|
};
|
|
|
|
} catch (error) {
|
|
logger.error(` ❌ Error importing ${path.basename(filePath)}:`, error);
|
|
this.stats.errors++;
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Import all articles from directory
|
|
*/
|
|
async importDirectory(dirPath) {
|
|
logger.info(`\n🚀 Starting import from: ${dirPath}\n`);
|
|
|
|
const files = await fs.readdir(dirPath);
|
|
const markdownFiles = files.filter(f =>
|
|
f.endsWith('.md') &&
|
|
!f.includes('STRATEGY') &&
|
|
!f.includes('INSTRUCTIONS') &&
|
|
!f.includes('SUMMARY') &&
|
|
!f.includes('ANALYSIS') &&
|
|
!f.includes('RECRUITMENT') &&
|
|
!f.includes('PRESENTATION')
|
|
);
|
|
|
|
logger.info(`Found ${markdownFiles.length} article files to process\n`);
|
|
|
|
const results = [];
|
|
|
|
for (const file of markdownFiles) {
|
|
this.stats.processed++;
|
|
const filePath = path.join(dirPath, file);
|
|
const result = await this.importArticle(filePath);
|
|
|
|
if (result) {
|
|
results.push({ file, ...result });
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Extract tags from analysis and metadata
|
|
*/
|
|
_extractTags(analysis, metadata) {
|
|
const tags = ['imported', 'outreach'];
|
|
|
|
if (metadata.targetPublication) {
|
|
tags.push(metadata.targetPublication);
|
|
}
|
|
|
|
if (metadata.contentType) {
|
|
tags.push(metadata.contentType);
|
|
}
|
|
|
|
if (analysis.themes) {
|
|
analysis.themes.slice(0, 3).forEach(theme => {
|
|
tags.push(theme.theme.toLowerCase().replace(/\s+/g, '-'));
|
|
});
|
|
}
|
|
|
|
return [...new Set(tags)]; // Remove duplicates
|
|
}
|
|
|
|
/**
|
|
* Map publication type to content type
|
|
*/
|
|
_mapPublicationType(pubType) {
|
|
const mapping = {
|
|
'letter': 'letter',
|
|
'oped': 'oped',
|
|
'essay': 'essay',
|
|
'social': 'social'
|
|
};
|
|
return mapping[pubType] || 'essay';
|
|
}
|
|
|
|
/**
|
|
* Print summary
|
|
*/
|
|
printSummary(results) {
|
|
logger.info(`\n${'='.repeat(60)}`);
|
|
logger.info(` IMPORT SUMMARY`);
|
|
logger.info(`${'='.repeat(60)}\n`);
|
|
|
|
logger.info(` Processed: ${this.stats.processed}`);
|
|
logger.info(` Imported: ${this.stats.imported}`);
|
|
logger.info(` Skipped: ${this.stats.skipped}`);
|
|
logger.info(` Errors: ${this.stats.errors}\n`);
|
|
|
|
if (results.length > 0) {
|
|
logger.info(` Imported Articles:\n`);
|
|
results.forEach(result => {
|
|
logger.info(` ✓ ${result.blogPost.title}`);
|
|
logger.info(` - BlogPost ID: ${result.blogPost._id}`);
|
|
if (result.submissionTracking) {
|
|
logger.info(` - Submission ID: ${result.submissionTracking._id}`);
|
|
logger.info(` - Target: ${result.submissionTracking.publicationName}`);
|
|
}
|
|
if (result.analysis && result.analysis.publicationFit) {
|
|
logger.info(` - Fit Score: ${result.analysis.publicationFit.score}/10`);
|
|
}
|
|
logger.info('');
|
|
});
|
|
}
|
|
|
|
logger.info(`${'='.repeat(60)}\n`);
|
|
}
|
|
|
|
/**
|
|
* Close connections
|
|
*/
|
|
async close() {
|
|
await mongoose.connection.close();
|
|
logger.info('✅ Disconnected from MongoDB');
|
|
}
|
|
}
|
|
|
|
// Main execution
|
|
async function main() {
|
|
const importer = new ArticleImporter();
|
|
|
|
try {
|
|
// Connect to database
|
|
await importer.connect();
|
|
|
|
// Initialize services
|
|
await importer.initialize();
|
|
|
|
// Import articles
|
|
const outreachDir = path.join(__dirname, '../docs/outreach');
|
|
const results = await importer.importDirectory(outreachDir);
|
|
|
|
// Print summary
|
|
importer.printSummary(results);
|
|
|
|
// Close connections
|
|
await importer.close();
|
|
|
|
process.exit(0);
|
|
|
|
} catch (error) {
|
|
logger.error('Fatal error:', error);
|
|
await importer.close();
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Run if called directly
|
|
if (require.main === module) {
|
|
main();
|
|
}
|
|
|
|
module.exports = ArticleImporter;
|