All files / src/utils html-sanitize.ts

100% Statements 41/41
100% Branches 12/12
100% Functions 2/2
100% Lines 41/41

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94                                                            21x 21x   21x 68x 68x 5x 5x     63x   63x 63x   2x 2x   61x 61x   21x       30x 30x 30x 30x 30x   30x 34x 34x 15x 15x     19x   19x 19x   1x 1x     18x 18x   1x 1x   17x 17x 1x 1x   16x 16x   30x    
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Utils/HtmlSanitize
 * @description Shared HTML sanitization helpers used across the generation,
 * validation, and quality-scoring pipelines.
 */
 
/**
 * Remove all `<script>…</script>` blocks from an HTML string, replacing each
 * with a single space.
 *
 * Uses iterative index-based scanning instead of a single-pass regex so that
 * CodeQL does not flag the pattern as an insecure HTML tag filter
 * (`js/bad-tag-filter`).
 *
 * @param html - HTML string to strip
 * @returns The HTML with script blocks replaced by spaces
 */
/**
 * Strip all HTML tags from a string, replacing each tag with a single space.
 *
 * Uses iterative index-based scanning instead of regex to avoid polynomial
 * backtracking (CodeQL `js/polynomial-redos`).
 *
 * @param html - HTML string to strip
 * @returns The text content with tags replaced by spaces
 */
export function stripHtmlTags(html: string): string {
  let result = '';
  let pos = 0;
 
  while (pos < html.length) {
    const openIdx = html.indexOf('<', pos);
    if (openIdx < 0) {
      result += html.slice(pos);
      break;
    }
    // Copy text before the tag
    result += html.slice(pos, openIdx);
    // Find the closing '>'
    const closeIdx = html.indexOf('>', openIdx + 1);
    if (closeIdx < 0) {
      // Unclosed tag — keep the rest as-is
      result += html.slice(openIdx);
      break;
    }
    result += ' ';
    pos = closeIdx + 1;
  }
  return result;
}
 
export function stripScriptBlocks(html: string): string {
  const OPEN = '<script';
  const CLOSE = '</script';
  let result = '';
  let pos = 0;
  const lower = html.toLowerCase();
 
  while (pos < html.length) {
    const openIdx = lower.indexOf(OPEN, pos);
    if (openIdx < 0) {
      result += html.slice(pos);
      break;
    }
    // Copy everything before the opening <script
    result += html.slice(pos, openIdx);
    // Find the end of the opening tag
    const openEnd = html.indexOf('>', openIdx);
    if (openEnd < 0) {
      // Malformed — no closing `>`, keep rest as-is
      result += html.slice(openIdx);
      break;
    }
    // Find the closing </script...> tag
    const closeIdx = lower.indexOf(CLOSE, openEnd + 1);
    if (closeIdx < 0) {
      // No closing tag — drop the rest
      result += ' ';
      break;
    }
    const closeEnd = html.indexOf('>', closeIdx);
    if (closeEnd < 0) {
      result += ' ';
      break;
    }
    result += ' ';
    pos = closeEnd + 1;
  }
  return result;
}