tractatus/scripts/generate-card-sections.js

#!/usr/bin/env node

/**
 * Generate Card Presentation Sections from Markdown Documents
 *
 * Parses markdown files and creates structured sections for card-based UI presentation.
 * Handles H2/H3 headers, converts to HTML, generates excerpts, estimates reading time.
 *
 * Usage: node scripts/generate-card-sections.js <markdown-file> [--update-db]
 *
 * Example: node scripts/generate-card-sections.js introduction-to-the-tractatus-framework.md --update-db
 */

// Load environment variables
require('dotenv').config();

const fs = require('fs');
const path = require('path');
const { marked } = require('marked');

// Configuration
const WORDS_PER_MINUTE = 200; // Average reading speed

/**
 * Extract sections from markdown content
 * @param {string} markdown - Raw markdown content
 * @returns {Array} Array of section objects
 */
function extractSections(markdown) {
  const lines = markdown.split('\n');
  const sections = [];
  let currentSection = null;
  let contentBuffer = [];

  for (let i = 0; i < lines.length; i++) {
    const line = lines[i];

    // Match H2 headers (## Title)
    const h2Match = line.match(/^## (.+)$/);
    if (h2Match) {
      // Save previous section if exists
      if (currentSection) {
        currentSection.content_md = contentBuffer.join('\n').trim();
        sections.push(currentSection);
      }

      // Start new section
      currentSection = {
        title: h2Match[1].trim(),
        content_md: '',
        subsections: []
      };
      contentBuffer = [];
      continue;
    }

    // Collect content for current section
    if (currentSection) {
      contentBuffer.push(line);
    }
  }

  // Save final section
  if (currentSection) {
    currentSection.content_md = contentBuffer.join('\n').trim();
    sections.push(currentSection);
  }

  return sections;
}

/**
 * Generate excerpt from markdown content
 * @param {string} markdown - Markdown content
 * @param {number} maxLength - Maximum excerpt length
 * @returns {string} Excerpt text
 */
function generateExcerpt(markdown, maxLength = 150) {
  // Remove markdown formatting
  let text = markdown
    .replace(/^#+\s+/gm, '') // Remove headers
    .replace(/\*\*(.+?)\*\*/g, '$1') // Remove bold
    .replace(/\*(.+?)\*/g, '$1') // Remove italic
    .replace(/\[(.+?)\]\(.+?\)/g, '$1') // Remove links
    .replace(/`(.+?)`/g, '$1') // Remove inline code
    .replace(/^[-*+]\s+/gm, '') // Remove list markers
    .replace(/^\d+\.\s+/gm, '') // Remove numbered lists
    .replace(/\n{2,}/g, ' ') // Collapse multiple newlines
    .trim();

  // Truncate to maxLength
  if (text.length > maxLength) {
    text = text.substring(0, maxLength).trim();
    // Find last complete sentence
    const lastPeriod = text.lastIndexOf('.');
    if (lastPeriod > maxLength * 0.7) {
      text = text.substring(0, lastPeriod + 1);
    } else {
      text += '...';
    }
  }

  return text;
}

/**
 * Estimate reading time based on word count
 * @param {string} text - Text content
 * @returns {number} Reading time in minutes
 */
function estimateReadingTime(text) {
  const wordCount = text.split(/\s+/).length;
  const minutes = Math.ceil(wordCount / WORDS_PER_MINUTE);
  return Math.max(1, minutes); // Minimum 1 minute
}

/**
 * Classify section category based on content analysis
 * @param {string} title - Section title
 * @param {string} content - Section content
 * @returns {string} Category (conceptual|practical|technical|reference|critical)
 */
function classifySection(title, content) {
  const titleLower = title.toLowerCase();
  const contentLower = content.toLowerCase();

  // Critical: Security, limitations, failures, warnings
  if (
    titleLower.includes('limitation') ||
    titleLower.includes('failure') ||
    titleLower.includes('warning') ||
    titleLower.includes('security') ||
    titleLower.includes('risk') ||
    content.match(/⚠️|critical|warning|caution|danger/gi)
  ) {
    return 'critical';
  }

  // Reference: Glossaries, definitions, specifications
  if (
    titleLower.includes('glossary') ||
    titleLower.includes('reference') ||
    titleLower.includes('contact') ||
    titleLower.includes('license') ||
    titleLower.includes('getting started')
  ) {
    return 'reference';
  }

  // Technical: Code, APIs, architecture, implementation details
  if (
    titleLower.includes('technical') ||
    titleLower.includes('architecture') ||
    titleLower.includes('implementation') ||
    titleLower.includes('integration') ||
    titleLower.includes('api') ||
    content.match(/```|`[a-z]+`|function|class|const|import/gi)
  ) {
    return 'technical';
  }

  // Practical: How-to, tutorials, guides, use cases
  if (
    titleLower.includes('how') ||
    titleLower.includes('guide') ||
    titleLower.includes('tutorial') ||
    titleLower.includes('example') ||
    titleLower.includes('use case') ||
    titleLower.includes('should use') ||
    titleLower.includes('contributing')
  ) {
    return 'practical';
  }

  // Default to conceptual: Theory, principles, explanations
  return 'conceptual';
}

/**
 * Determine technical level based on content complexity
 * @param {string} content - Section content
 * @returns {string} Technical level (beginner|intermediate|advanced)
 */
function determineTechnicalLevel(content) {
  const contentLower = content.toLowerCase();

  // Advanced: Code examples, APIs, complex architecture
  if (
    content.match(/```[\s\S]+```/g) ||
    contentLower.includes('api') ||
    contentLower.includes('implementation') ||
    contentLower.includes('integration') ||
    contentLower.includes('architecture')
  ) {
    return 'advanced';
  }

  // Intermediate: Technical concepts without code
  if (
    contentLower.includes('service') ||
    contentLower.includes('component') ||
    contentLower.includes('system') ||
    contentLower.includes('framework')
  ) {
    return 'intermediate';
  }

  // Beginner: High-level concepts, introductions
  return 'beginner';
}

/**
 * Generate slug from title
 * @param {string} title - Section title
 * @returns {string} URL-friendly slug
 */
function generateSlug(title) {
  return title
    .toLowerCase()
    .replace(/[^a-z0-9\s-]/g, '')
    .replace(/\s+/g, '-')
    .replace(/-+/g, '-')
    .replace(/^-|-$/g, '');
}

/**
 * Process markdown file and generate card sections
 * @param {string} filePath - Path to markdown file
 * @returns {Array} Array of section objects ready for MongoDB
 */
async function processMarkdownFile(filePath) {
  console.log(`\n📄 Processing: ${filePath}`);

  // Read markdown file
  const markdown = fs.readFileSync(filePath, 'utf8');

  // Extract sections
  const rawSections = extractSections(markdown);
  console.log(`   Found ${rawSections.length} sections`);

  // Process each section
  const sections = [];
  for (let i = 0; i < rawSections.length; i++) {
    const raw = rawSections[i];

    // Skip empty sections
    if (!raw.content_md.trim()) {
      continue;
    }

    // Convert markdown to HTML
    const content_html = marked(raw.content_md);

    // Generate metadata
    const excerpt = generateExcerpt(raw.content_md);
    const readingTime = estimateReadingTime(raw.content_md);
    const category = classifySection(raw.title, raw.content_md);
    const technicalLevel = determineTechnicalLevel(raw.content_md);
    const slug = generateSlug(raw.title);

    const section = {
      number: i + 1,
      title: raw.title,
      slug,
      content_html,
      excerpt,
      readingTime,
      technicalLevel,
      category
    };

    sections.push(section);

    console.log(`   ${i + 1}. ${section.title}`);
    console.log(`      Category: ${category} | Level: ${technicalLevel} | ${readingTime} min`);
  }

  return sections;
}

/**
 * Update document in MongoDB with generated sections
 * @param {string} slug - Document slug
 * @param {Array} sections - Array of section objects
 */
async function updateDatabase(slug, sections) {
  try {
    // Get Document model (uses MongoDB driver directly, not Mongoose)
    const Document = require('../src/models/Document.model.js');

    // Find document by slug
    const doc = await Document.findBySlug(slug);
    if (!doc) {
      console.error(`   ❌ Document not found: ${slug}`);
      return false;
    }

    // Update sections
    const success = await Document.update(doc._id.toString(), { sections });

    if (!success) {
      console.error(`   ❌ Failed to update document`);
      return false;
    }

    console.log(`   ✅ Updated document in MongoDB: ${doc.title}`);
    console.log(`   📊 Sections: ${sections.length}`);

    return true;
  } catch (error) {
    console.error(`   ❌ Database error: ${error.message}`);
    return false;
  }
}

/**
 * Main execution
 */
async function main() {
  const args = process.argv.slice(2);

  if (args.length === 0) {
    console.error('Usage: node scripts/generate-card-sections.js <markdown-file> [--update-db]');
    console.error('Example: node scripts/generate-card-sections.js introduction-to-the-tractatus-framework.md --update-db');
    process.exit(1);
  }

  const markdownFile = args[0];
  const updateDb = args.includes('--update-db');

  if (!fs.existsSync(markdownFile)) {
    console.error(`❌ File not found: ${markdownFile}`);
    process.exit(1);
  }

  // Generate sections
  const sections = await processMarkdownFile(markdownFile);

  // Output JSON
  console.log(`\n📦 Generated ${sections.length} sections\n`);

  if (!updateDb) {
    console.log(JSON.stringify(sections, null, 2));
    console.log('\n💡 To update database, add --update-db flag');
  } else {
    // Extract slug from filename
    const slug = path.basename(markdownFile, '.md');
    const success = await updateDatabase(slug, sections);

    if (success) {
      console.log(`\n✅ Card presentation sections updated successfully!`);
    } else {
      console.log(`\n❌ Failed to update database`);
      process.exit(1);
    }
  }
}

// Run if called directly
if (require.main === module) {
  main().catch(error => {
    console.error('Fatal error:', error);
    process.exit(1);
  });
}

module.exports = { processMarkdownFile, extractSections, classifySection };