tractatus/src/middleware/input-validation.middleware.js

/**
 * Input Validation Middleware - FULL COMPLIANCE (inst_043)
 * Comprehensive sanitization and validation for all user input
 *
 * Security Layers:
 * 1. Length limits (configurable, default 5000 chars)
 * 2. HTML sanitization using DOMPurify (sovereign JS)
 * 3. SQL/NoSQL injection prevention
 * 4. XSS prevention (CSP + output encoding)
 * 5. CSRF protection (see csrf-protection.middleware.js)
 * 6. Rate limiting (see rate-limit.middleware.js)
 */

const validator = require('validator');
const { logSecurityEvent, getClientIp } = require('../utils/security-logger');

// DOMPurify for server-side HTML sanitization
let DOMPurify;
try {
  const createDOMPurify = require('dompurify');
  const { JSDOM } = require('jsdom');
  const window = new JSDOM('').window;
  DOMPurify = createDOMPurify(window);
} catch (e) {
  console.warn('[INPUT VALIDATION] DOMPurify not available, using basic sanitization');
  DOMPurify = null;
}

// Input length limits per field type (inst_043)
const LENGTH_LIMITS = {
  email: 254,
  url: 2048,
  phone: 20,
  name: 100,
  title: 200,
  description: 5000,
  case_study: 50000,
  default: 5000
};

/**
 * HTML sanitization using DOMPurify (inst_043 Layer 2)
 * Strips ALL HTML tags except safe whitelist for markdown fields
 */
function sanitizeHTML(input, allowMarkdown = false) {
  if (typeof input !== 'string') return '';

  if (DOMPurify) {
    const config = allowMarkdown
      ? { ALLOWED_TAGS: ['p', 'br', 'strong', 'em', 'ul', 'ol', 'li', 'code', 'pre'] }
      : { ALLOWED_TAGS: [] }; // Strip all HTML
    return DOMPurify.sanitize(input, config);
  }

  // Fallback: Basic HTML sanitization
  return input
    .replace(/<[^>]*>/g, '')  // Remove HTML tags
    .replace(/javascript:/gi, '')  // Remove javascript: URLs
    .replace(/on\w+\s*=/gi, '')  // Remove event handlers
    .trim();
}

/**
 * NoSQL injection prevention (inst_043 Layer 4)
 * Validates input against expected data types and patterns
 */
function detectNoSQLInjection(value) {
  if (typeof value !== 'string') return false;

  // MongoDB query operator patterns
  const nosqlPatterns = [
    /\$where/i,
    /\$ne/i,
    /\$gt/i,
    /\$lt/i,
    /\$regex/i,
    /\$or/i,
    /\$and/i,
    /^\s*{.*[\$\|].*}/,  // Object-like structure with $ or |
  ];

  return nosqlPatterns.some(pattern => pattern.test(value));
}

/**
 * Validate email format
 */
function isValidEmail(email) {
  return validator.isEmail(email);
}

/**
 * Validate URL format
 */
function isValidURL(url) {
  return validator.isURL(url, { require_protocol: true });
}

/**
 * Create input validation middleware
 */
function createInputValidationMiddleware(schema) {
  return async (req, res, next) => {
    const clientIp = getClientIp(req);
    const errors = [];
    const sanitized = {};

    try {
      for (const [field, config] of Object.entries(schema)) {
        const value = req.body[field];

        // Required field check
        if (config.required && !value) {
          errors.push(`${field} is required`);
          continue;
        }

        // Skip validation if optional and not provided
        if (!value && !config.required) {
          continue;
        }

        // Length validation
        const maxLength = config.maxLength || LENGTH_LIMITS[config.type] || LENGTH_LIMITS.default;
        if (value && value.length > maxLength) {
          errors.push(`${field} exceeds maximum length of ${maxLength} characters`);
          continue;
        }

        // Type-specific validation
        if (config.type === 'email' && !isValidEmail(value)) {
          errors.push(`${field} must be a valid email address`);
          continue;
        }

        if (config.type === 'url' && !isValidURL(value)) {
          errors.push(`${field} must be a valid URL`);
          continue;
        }

        // NoSQL injection detection (inst_043 Layer 4)
        if (typeof value === 'string' && detectNoSQLInjection(value)) {
          await logSecurityEvent({
            type: 'nosql_injection_attempt',
            sourceIp: clientIp,
            userId: req.user?.id,
            endpoint: req.path,
            userAgent: req.get('user-agent'),
            details: {
              field,
              pattern: value.substring(0, 100)
            },
            action: 'blocked',
            severity: 'critical'
          });

          errors.push(`${field} contains invalid characters`);
          continue;
        }

        // HTML sanitization (inst_043 Layer 2)
        if (typeof value === 'string') {
          const allowMarkdown = config.allowMarkdown || false;
          sanitized[field] = sanitizeHTML(value, allowMarkdown);

          // Log if sanitization changed the input (potential XSS attempt)
          if (sanitized[field] !== value) {
            await logSecurityEvent({
              type: 'input_sanitized',
              sourceIp: clientIp,
              userId: req.user?.id,
              endpoint: req.path,
              userAgent: req.get('user-agent'),
              details: {
                field,
                original_length: value.length,
                sanitized_length: sanitized[field].length
              },
              action: 'sanitized',
              severity: 'low'
            });
          }
        } else {
          sanitized[field] = value;
        }
      }

      // If validation errors, reject request
      if (errors.length > 0) {
        await logSecurityEvent({
          type: 'input_validation_failure',
          sourceIp: clientIp,
          userId: req.user?.id,
          endpoint: req.path,
          userAgent: req.get('user-agent'),
          details: {
            errors,
            fields: Object.keys(schema)
          },
          action: 'rejected',
          severity: 'medium'
        });

        return res.status(400).json({
          error: 'Validation failed',
          details: errors
        });
      }

      // Replace req.body with sanitized values
      req.body = { ...req.body, ...sanitized };
      req.validationPassed = true;

      next();

    } catch (error) {
      console.error('[INPUT VALIDATION ERROR]', error);

      await logSecurityEvent({
        type: 'input_validation_error',
        sourceIp: clientIp,
        userId: req.user?.id,
        endpoint: req.path,
        userAgent: req.get('user-agent'),
        details: {
          error: error.message
        },
        action: 'rejected',
        severity: 'high'
      });

      return res.status(500).json({
        error: 'Validation failed',
        message: 'An error occurred during input validation'
      });
    }
  };
}

module.exports = {
  createInputValidationMiddleware,
  sanitizeHTML,
  isValidEmail,
  isValidURL,
  detectNoSQLInjection,
  LENGTH_LIMITS
};

// NOTE: inst_043 Layers 5 (CSRF) and 6 (Rate Limiting) are implemented in:
// - src/middleware/csrf-protection.middleware.js
// - src/middleware/rate-limit.middleware.js