tractatus/scripts/add-sections-from-db-markdown.js

#!/usr/bin/env node
/**
 * Add Card View Sections to Documents (Using DB Markdown)
 *
 * Generates sections from the content_markdown field stored in the database
 * for documents that don't have corresponding MD files on disk.
 */

require('dotenv').config();

const { connect, close } = require('../src/utils/db.util');
const Document = require('../src/models/Document.model');
const { marked } = require('marked');

// List of document slugs that need sections
const SLUGS_NEEDING_SECTIONS = [
  // 5 newly imported archives
  'case-studies-real-world-llm-failure-modes-appendix',
  'implementation-guide-python-examples',
  'tractatus-framework-enforcement-claude-code',
  'research-topic-concurrent-session-architecture',
  'research-topic-rule-proliferation-transactional-overhead',

  // 5 technical reference docs
  'implementation-roadmap-24-month-deployment-plan',
  'api-reference-complete',
  'api-javascript-examples',
  'api-python-examples',
  'openapi-specification',

  // 5 case studies
  'the-27027-incident-a-case-study-in-pattern-recognition-bias',
  'when-frameworks-fail-and-why-thats-ok',
  'our-framework-in-action-detecting-and-correcting-ai-fabrications',
  'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery',
  'case-studies-real-world-llm-failure-modes',

  // 2 Phase 5 PoC summaries
  'phase-5-poc-session-1-summary',
  'phase-5-poc-session-2-summary'
];

function extractSectionsFromMarkdown(markdown) {
  const lines = markdown.split('\n');
  const sections = [];
  let currentSection = null;
  let contentBuffer = [];

  for (let i = 0; i < lines.length; i++) {
    const line = lines[i];

    // Match H2 headers (## Title)
    const h2Match = line.match(/^## (.+)$/);
    if (h2Match) {
      // Save previous section if exists
      if (currentSection) {
        currentSection.content_md = contentBuffer.join('\n').trim();
        sections.push(currentSection);
      }

      // Start new section
      currentSection = {
        title: h2Match[1].trim(),
        content_md: ''
      };
      contentBuffer = [];
      continue;
    }

    // Collect content for current section
    if (currentSection) {
      contentBuffer.push(line);
    }
  }

  // Save final section
  if (currentSection) {
    currentSection.content_md = contentBuffer.join('\n').trim();
    sections.push(currentSection);
  }

  return sections;
}

function generateExcerpt(markdown, maxLength = 150) {
  let text = markdown
    .replace(/^#+\s+/gm, '')
    .replace(/\*\*(.+?)\*\*/g, '$1')
    .replace(/\*(.+?)\*/g, '$1')
    .replace(/\[(.+?)\]\(.+?\)/g, '$1')
    .replace(/`(.+?)`/g, '$1')
    .replace(/^[-*+]\s+/gm, '')
    .replace(/^\d+\.\s+/gm, '')
    .replace(/\n{2,}/g, ' ')
    .trim();

  if (text.length > maxLength) {
    text = text.substring(0, maxLength).trim();
    const lastPeriod = text.lastIndexOf('.');
    if (lastPeriod > maxLength * 0.7) {
      text = text.substring(0, lastPeriod + 1);
    } else {
      text += '...';
    }
  }

  return text;
}

function estimateReadingTime(text) {
  const wordCount = text.split(/\s+/).length;
  const minutes = Math.ceil(wordCount / 200);
  return Math.max(1, minutes);
}

function classifySection(title, content) {
  const titleLower = title.toLowerCase();
  const contentLower = content.toLowerCase();

  if (
    titleLower.includes('limitation') ||
    titleLower.includes('failure') ||
    titleLower.includes('warning') ||
    titleLower.includes('security') ||
    titleLower.includes('risk') ||
    content.match(/⚠️|critical|warning|caution|danger/gi)
  ) {
    return 'critical';
  }

  if (
    titleLower.includes('glossary') ||
    titleLower.includes('reference') ||
    titleLower.includes('contact') ||
    titleLower.includes('license') ||
    titleLower.includes('getting started')
  ) {
    return 'reference';
  }

  if (
    titleLower.includes('technical') ||
    titleLower.includes('architecture') ||
    titleLower.includes('implementation') ||
    titleLower.includes('integration') ||
    titleLower.includes('api') ||
    content.match(/```|`[a-z]+`|function|class|const|import/gi)
  ) {
    return 'technical';
  }

  if (
    titleLower.includes('how') ||
    titleLower.includes('guide') ||
    titleLower.includes('tutorial') ||
    titleLower.includes('example') ||
    titleLower.includes('use case') ||
    titleLower.includes('should use') ||
    titleLower.includes('contributing')
  ) {
    return 'practical';
  }

  return 'conceptual';
}

function determineTechnicalLevel(content) {
  const contentLower = content.toLowerCase();

  if (
    content.match(/```[\s\S]+```/g) ||
    contentLower.includes('api') ||
    contentLower.includes('implementation') ||
    contentLower.includes('integration') ||
    contentLower.includes('architecture')
  ) {
    return 'advanced';
  }

  if (
    contentLower.includes('service') ||
    contentLower.includes('component') ||
    contentLower.includes('system') ||
    contentLower.includes('framework')
  ) {
    return 'intermediate';
  }

  return 'beginner';
}

function generateSlug(title) {
  return title
    .toLowerCase()
    .replace(/[^a-z0-9\s-]/g, '')
    .replace(/\s+/g, '-')
    .replace(/-+/g, '-')
    .replace(/^-|-$/g, '');
}

async function addSectionsToDocument(slug) {
  console.log(`\n📄 Processing: ${slug}`);

  try {
    // Find document
    const doc = await Document.findBySlug(slug);
    if (!doc) {
      console.log(`   ❌ Document not found`);
      return { success: false, reason: 'not_found' };
    }

    // Check if already has sections
    if (doc.sections && doc.sections.length > 0) {
      console.log(`   ⏭️  Already has ${doc.sections.length} sections`);
      return { success: false, reason: 'has_sections' };
    }

    // Check if has content_markdown
    if (!doc.content_markdown) {
      console.log(`   ❌ No content_markdown field`);
      return { success: false, reason: 'no_markdown' };
    }

    // Extract sections from markdown
    const rawSections = extractSectionsFromMarkdown(doc.content_markdown);

    if (rawSections.length === 0) {
      console.log(`   ⚠️  No H2 sections found in markdown`);
      return { success: false, reason: 'no_h2' };
    }

    console.log(`   📝 Found ${rawSections.length} sections`);

    // Process each section
    const sections = [];
    for (let i = 0; i < rawSections.length; i++) {
      const raw = rawSections[i];

      if (!raw.content_md.trim()) {
        continue;
      }

      const content_html = marked(raw.content_md);
      const excerpt = generateExcerpt(raw.content_md);
      const readingTime = estimateReadingTime(raw.content_md);
      const category = classifySection(raw.title, raw.content_md);
      const technicalLevel = determineTechnicalLevel(raw.content_md);
      const sectionSlug = generateSlug(raw.title);

      sections.push({
        number: i + 1,
        title: raw.title,
        slug: sectionSlug,
        content_html,
        excerpt,
        readingTime,
        technicalLevel,
        category
      });
    }

    // Update document
    const updated = await Document.update(doc._id.toString(), { sections });

    if (!updated) {
      console.log(`   ❌ Failed to update`);
      return { success: false, reason: 'update_failed' };
    }

    console.log(`   ✅ Added ${sections.length} sections`);
    sections.forEach(s => {
      console.log(`      ${s.number}. ${s.title} (${s.category}, ${s.readingTime}min)`);
    });

    return { success: true, sections: sections.length };

  } catch (error) {
    console.error(`   ❌ Error: ${error.message}`);
    return { success: false, error: error.message };
  }
}

async function main() {
  try {
    console.log('🚀 Adding Card View Sections to 17 Documents\n');
    console.log('═══════════════════════════════════════════════════\n');

    await connect();

    let added = 0;
    let skipped = 0;
    let noH2 = 0;
    let failed = 0;

    for (const slug of SLUGS_NEEDING_SECTIONS) {
      const result = await addSectionsToDocument(slug);

      if (result.success) {
        added++;
      } else if (result.reason === 'has_sections') {
        skipped++;
      } else if (result.reason === 'no_h2') {
        noH2++;
      } else {
        failed++;
      }
    }

    console.log('\n═══════════════════════════════════════════════════');
    console.log('\n📊 Summary:');
    console.log(`   ✅ Added sections: ${added}`);
    console.log(`   ⏭️  Skipped (already have sections): ${skipped}`);
    console.log(`   ⚠️  No H2 sections found: ${noH2}`);
    console.log(`   ❌ Failed: ${failed}`);
    console.log(`   📦 Total: ${SLUGS_NEEDING_SECTIONS.length}`);

    await close();

  } catch (error) {
    console.error('\n❌ Fatal error:', error);
    process.exit(1);
  }
}

main();