tractatus/src/utils/document-section-parser.js
TheFlow 7f6192cbd6 refactor(lint): fix code style and unused variables across src/
- Fixed unused function parameters by prefixing with underscore
- Removed unused imports and variables
- Applied eslint --fix for automatic style fixes
  - Property shorthand
  - String template literals
  - Prefer const over let where appropriate
  - Spacing and formatting

Reduces lint errors from 108+ to 78 (61 unused vars, 17 other issues)

Related to CI lint failures in previous commit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-24 20:15:26 +13:00

267 lines
7.5 KiB
JavaScript

/**
* Document Section Parser
* Analyzes markdown documents and creates card-based sections
*/
/**
* Parse document into sections based on H2 headings
*/
function parseDocumentSections(markdown, contentHtml) {
if (!markdown) return [];
const sections = [];
const lines = markdown.split('\n');
let currentSection = null;
let sectionContent = [];
// Find H1 (document title) first
let documentTitle = '';
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const h1Match = line.match(/^#\s+(.+)$/);
if (h1Match) {
documentTitle = h1Match[1].trim();
break;
}
}
// Parse sections by H2 headings
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Check for H2 heading (## Heading)
const h2Match = line.match(/^##\s+(.+)$/);
if (h2Match) {
// Save previous section if exists
if (currentSection) {
currentSection.content = sectionContent.join('\n').trim();
currentSection.excerpt = extractExcerpt(currentSection.content);
currentSection.readingTime = estimateReadingTime(currentSection.content);
currentSection.technicalLevel = detectTechnicalLevel(currentSection.content);
currentSection.category = categorizeSection(currentSection.title, currentSection.content);
sections.push(currentSection);
}
// Start new section
const title = h2Match[1].trim();
const slug = generateSlug(title);
currentSection = {
title,
slug,
level: 2,
content: '',
excerpt: '',
readingTime: 0,
technicalLevel: 'basic',
category: 'conceptual'
};
// Include the H2 heading itself in the section content
sectionContent = [line];
} else if (currentSection) {
// Only add content until we hit another H2 or H1
const isH1 = line.match(/^#\s+[^#]/);
if (isH1) {
// Skip H1 (document title) - don't add to section
continue;
}
// Add all other content (including H3, H4, paragraphs, etc.)
sectionContent.push(line);
}
}
// Save last section
if (currentSection && sectionContent.length > 0) {
currentSection.content = sectionContent.join('\n').trim();
currentSection.excerpt = extractExcerpt(currentSection.content);
currentSection.readingTime = estimateReadingTime(currentSection.content);
currentSection.technicalLevel = detectTechnicalLevel(currentSection.content);
currentSection.category = categorizeSection(currentSection.title, currentSection.content);
sections.push(currentSection);
}
return sections;
}
/**
* Extract excerpt from content (first 2-3 sentences, max 150 chars)
*/
function extractExcerpt(content) {
if (!content) return '';
// Remove markdown formatting
const text = content
.replace(/^#+\s+/gm, '') // Remove headings
.replace(/\*\*(.+?)\*\*/g, '$1') // Remove bold
.replace(/\*(.+?)\*/g, '$1') // Remove italic
.replace(/`(.+?)`/g, '$1') // Remove code
.replace(/\[(.+?)\]\(.+?\)/g, '$1') // Remove links
.replace(/^[-*]\s+/gm, '') // Remove list markers
.replace(/^\d+\.\s+/gm, '') // Remove numbered lists
.replace(/^>\s+/gm, '') // Remove blockquotes
.replace(/\n+/g, ' ') // Collapse newlines
.trim();
// Get first 2-3 sentences
const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
let excerpt = sentences.slice(0, 2).join(' ');
// Truncate to 150 chars if needed
if (excerpt.length > 150) {
excerpt = `${excerpt.substring(0, 147) }...`;
}
return excerpt;
}
/**
* Estimate reading time in minutes (avg 200 words/min)
*/
function estimateReadingTime(content) {
if (!content) return 1;
const words = content.split(/\s+/).length;
const minutes = Math.ceil(words / 200);
return Math.max(1, minutes);
}
/**
* Detect technical level based on content
*/
function detectTechnicalLevel(content) {
if (!content) return 'basic';
const lowerContent = content.toLowerCase();
// Technical indicators
const technicalTerms = [
'api', 'database', 'mongodb', 'algorithm', 'architecture',
'implementation', 'node.js', 'javascript', 'typescript',
'async', 'await', 'promise', 'class', 'function',
'middleware', 'authentication', 'authorization', 'encryption',
'hash', 'token', 'jwt', 'rest', 'graphql'
];
const advancedTerms = [
'metacognitive', 'stochastic', 'quadrant classification',
'intersection observer', 'csp', 'security policy',
'cross-reference validation', 'boundary enforcement',
'architectural constraints', 'formal verification'
];
let technicalScore = 0;
let advancedScore = 0;
// Count technical terms
technicalTerms.forEach(term => {
const regex = new RegExp(`\\b${term}\\b`, 'gi');
const matches = lowerContent.match(regex);
if (matches) technicalScore += matches.length;
});
// Count advanced terms
advancedTerms.forEach(term => {
const regex = new RegExp(`\\b${term}\\b`, 'gi');
const matches = lowerContent.match(regex);
if (matches) advancedScore += matches.length;
});
// Check for code blocks
const codeBlocks = (content.match(/```/g) || []).length / 2;
technicalScore += codeBlocks * 3;
// Determine level
if (advancedScore >= 3 || technicalScore >= 15) {
return 'advanced';
} else if (technicalScore >= 5) {
return 'intermediate';
} else {
return 'basic';
}
}
/**
* Categorize section based on title and content
*/
function categorizeSection(title, content) {
const lowerTitle = title.toLowerCase();
const lowerContent = content.toLowerCase();
// Category keywords
const categories = {
conceptual: [
'what is', 'introduction', 'overview', 'why', 'philosophy',
'concept', 'theory', 'principle', 'background', 'motivation'
],
technical: [
'architecture', 'implementation', 'technical', 'code', 'api',
'configuration', 'setup', 'installation', 'integration',
'class', 'function', 'service', 'component'
],
practical: [
'quick start', 'tutorial', 'guide', 'how to', 'example',
'walkthrough', 'getting started', 'usage', 'practice'
],
reference: [
'reference', 'api', 'specification', 'documentation',
'glossary', 'terms', 'definitions', 'index'
],
critical: [
'security', 'warning', 'important', 'critical', 'boundary',
'safety', 'risk', 'violation', 'error', 'failure'
]
};
// Check title first (higher weight)
for (const [category, keywords] of Object.entries(categories)) {
for (const keyword of keywords) {
if (lowerTitle.includes(keyword)) {
return category;
}
}
}
// Check content (lower weight)
const contentScores = {};
for (const [category, keywords] of Object.entries(categories)) {
contentScores[category] = 0;
for (const keyword of keywords) {
const regex = new RegExp(`\\b${keyword}\\b`, 'gi');
const matches = lowerContent.match(regex);
if (matches) contentScores[category] += matches.length;
}
}
// Return category with highest score
const maxCategory = Object.keys(contentScores).reduce((a, b) =>
contentScores[a] > contentScores[b] ? a : b
);
return contentScores[maxCategory] > 0 ? maxCategory : 'conceptual';
}
/**
* Generate URL-safe slug from title
*/
function generateSlug(title) {
return title
.toLowerCase()
.replace(/[^\w\s-]/g, '')
.replace(/\s+/g, '-')
.replace(/-+/g, '-')
.trim();
}
module.exports = {
parseDocumentSections,
extractExcerpt,
estimateReadingTime,
detectTechnicalLevel,
categorizeSection,
generateSlug
};