Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | 21x 21x 21x 68x 68x 5x 5x 63x 63x 63x 2x 2x 61x 61x 21x 30x 30x 30x 30x 30x 30x 34x 34x 15x 15x 19x 19x 19x 1x 1x 18x 18x 1x 1x 17x 17x 1x 1x 16x 16x 30x | // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
/**
* @module Utils/HtmlSanitize
* @description Shared HTML sanitization helpers used across the generation,
* validation, and quality-scoring pipelines.
*/
/**
* Remove all `<script>…</script>` blocks from an HTML string, replacing each
* with a single space.
*
* Uses iterative index-based scanning instead of a single-pass regex so that
* CodeQL does not flag the pattern as an insecure HTML tag filter
* (`js/bad-tag-filter`).
*
* @param html - HTML string to strip
* @returns The HTML with script blocks replaced by spaces
*/
/**
* Strip all HTML tags from a string, replacing each tag with a single space.
*
* Uses iterative index-based scanning instead of regex to avoid polynomial
* backtracking (CodeQL `js/polynomial-redos`).
*
* @param html - HTML string to strip
* @returns The text content with tags replaced by spaces
*/
export function stripHtmlTags(html: string): string {
let result = '';
let pos = 0;
while (pos < html.length) {
const openIdx = html.indexOf('<', pos);
if (openIdx < 0) {
result += html.slice(pos);
break;
}
// Copy text before the tag
result += html.slice(pos, openIdx);
// Find the closing '>'
const closeIdx = html.indexOf('>', openIdx + 1);
if (closeIdx < 0) {
// Unclosed tag — keep the rest as-is
result += html.slice(openIdx);
break;
}
result += ' ';
pos = closeIdx + 1;
}
return result;
}
export function stripScriptBlocks(html: string): string {
const OPEN = '<script';
const CLOSE = '</script';
let result = '';
let pos = 0;
const lower = html.toLowerCase();
while (pos < html.length) {
const openIdx = lower.indexOf(OPEN, pos);
if (openIdx < 0) {
result += html.slice(pos);
break;
}
// Copy everything before the opening <script
result += html.slice(pos, openIdx);
// Find the end of the opening tag
const openEnd = html.indexOf('>', openIdx);
if (openEnd < 0) {
// Malformed — no closing `>`, keep rest as-is
result += html.slice(openIdx);
break;
}
// Find the closing </script...> tag
const closeIdx = lower.indexOf(CLOSE, openEnd + 1);
if (closeIdx < 0) {
// No closing tag — drop the rest
result += ' ';
break;
}
const closeEnd = html.indexOf('>', closeIdx);
if (closeEnd < 0) {
result += ' ';
break;
}
result += ' ';
pos = closeEnd + 1;
}
return result;
}
|