All files / src/aggregator/html localize-body.ts

94.49% Statements 103/109
73.21% Branches 41/56
100% Functions 6/6
100% Lines 97/97

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344                                                                                            15x                                 75x   62x   62x 62x             62x 62x 62x 62x 62x 62x 62x 62x       62x       62x 62x         62x           62x 62x         62x           62x 62x   62x 62x 62x 62x 62x 62x 62x 62x 62x 62x 62x 62x       62x 62x           75x 75x 75x 62x 62x 62x 62x       62x 62x 62x 62x         62x 62x   62x 62x             62x                                       248x 248x 42x                                               9x 9x 45x 9x 9x 9x   9x 9x 36x   9x                                                                                   10x 10x 10x     9x 9x     9x 9x 9x             9x   9x 1x       8x 8x 8x 8x       9x 9x       10x                                     248x 248x 248x 82x 82x   248x   166x 166x   166x 166x 166x   166x 166x   166x    
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
// SPDX-License-Identifier: Apache-2.0
 
/**
 * @module Aggregator/Html/LocalizeBody
 * @description Localize the Tradecraft References and Analysis Index
 * appendix sections inside the rendered article body. Replaces English
 * headings, intro paragraphs, sub-headings, and table column headers
 * with their translated equivalents, using indexOf-based search to stay
 * within CodeQL's safe-regex envelope.
 */
 
import {
  TRADECRAFT_HEADING_LABELS,
  TRADECRAFT_INTRO_LABELS,
  TRADECRAFT_METHODOLOGIES_LABELS,
  TRADECRAFT_TEMPLATES_LABELS,
  ANALYSIS_INDEX_HEADING_LABELS,
  ANALYSIS_INDEX_INTRO_LABELS,
  ANALYSIS_INDEX_COL_SECTION_LABELS,
  ANALYSIS_INDEX_COL_ARTIFACT_LABELS,
  ANALYSIS_INDEX_COL_PATH_LABELS,
  KEY_TAKEAWAYS_HEADING_LABELS,
  SUPPLEMENTARY_HEADING_LABELS,
  getLocalizedString,
} from '../../constants/languages.js';
import { escapeHTML } from '../../utils/file-utils.js';
import type { LanguageCode } from '../../types/index.js';
import {
  TRADECRAFT_SECTION_ID,
  MANIFEST_SECTION_ID,
  SUPPLEMENTARY_SECTION_ID,
} from '../artifact-order.js';
import { KEY_TAKEAWAYS_SECTION_ID } from '../key-takeaways.js';
import { READER_GUIDE_SECTION_ID } from '../reader-guide-constants.js';
 
/**
 * Top-level section anchors that mark the **end** of the Executive Brief
 * body. Canonical analysis sections are matched by the shared
 * `id="section-…"` prefix (see {@link findExecutiveBriefSectionCut});
 * the appendix and reader-guide sections below carry bespoke ids that do
 * **not** share that prefix, so they are matched explicitly. Including
 * them ensures the localized brief splice also fires on sparse runs where
 * the Executive Brief is the last canonical section and only appendix
 * blocks follow it.
 */
const EXECUTIVE_BRIEF_BOUNDARY_ID_MARKERS: readonly string[] = [
  `id="${READER_GUIDE_SECTION_ID}"`,
  `id="${TRADECRAFT_SECTION_ID}"`,
  `id="${MANIFEST_SECTION_ID}"`,
  `id="${SUPPLEMENTARY_SECTION_ID}"`,
];
 
/**
 * Localize the Tradecraft References and Analysis Index sections in the
 * rendered article body HTML. Replaces English headings, introductions,
 * sub-headings, and table headers with translated equivalents.
 *
 * @param bodyHtml - The rendered HTML body (from Markdown)
 * @param lang - Target language code
 * @returns HTML body with localized appendix sections
 */
export function localizeArticleBody(bodyHtml: string, lang: LanguageCode): string {
  if (lang === 'en') return bodyHtml;
 
  let html = bodyHtml;
 
  const tradecraftHeading = getLocalizedString(TRADECRAFT_HEADING_LABELS, lang);
  html = replaceHeadingById(
    html,
    TRADECRAFT_SECTION_ID,
    'Tradecraft References',
    tradecraftHeading
  );
 
  const tradecraftIntroRaw = getLocalizedString(TRADECRAFT_INTRO_LABELS, lang);
  const introSentenceStart = 'This article is produced under the ';
  const introIdx = html.indexOf(introSentenceStart);
  Eif (introIdx !== -1) {
    const sentenceEnd = html.indexOf('</p>', introIdx);
    Eif (sentenceEnd !== -1) {
      const escapedIntro = escapeHTML(tradecraftIntroRaw);
      const localizedWithLink = escapedIntro.replace(
        escapeHTML('Hack23 AB'),
        '<a href="https://hack23.com">Hack23 AB</a>'
      );
      html = html.slice(0, introIdx) + localizedWithLink + html.slice(sentenceEnd);
    }
  }
 
  const methodsLabel = getLocalizedString(TRADECRAFT_METHODOLOGIES_LABELS, lang);
  html = replaceFirstStringIn(
    html,
    '<span>Methodologies</span>',
    `<span>${escapeHTML(methodsLabel)}</span>`
  );
  html = replaceFirstStringIn(
    html,
    '<h3>Methodologies</h3>',
    `<h3>${escapeHTML(methodsLabel)}</h3>`
  );
 
  const templatesLabel = getLocalizedString(TRADECRAFT_TEMPLATES_LABELS, lang);
  html = replaceFirstStringIn(
    html,
    '<span>Artifact templates</span>',
    `<span>${escapeHTML(templatesLabel)}</span>`
  );
  html = replaceFirstStringIn(
    html,
    '<h3>Artifact templates</h3>',
    `<h3>${escapeHTML(templatesLabel)}</h3>`
  );
 
  const analysisIndexHeading = getLocalizedString(ANALYSIS_INDEX_HEADING_LABELS, lang);
  html = replaceHeadingById(html, MANIFEST_SECTION_ID, 'Analysis Index', analysisIndexHeading);
 
  const analysisIndexIntroRaw = getLocalizedString(ANALYSIS_INDEX_INTRO_LABELS, lang);
  const manifestLinkPrefix = 'href="';
  const manifestJsonLiteral = 'manifest.json';
  const manifestLinkIdx = html.indexOf(manifestJsonLiteral);
  let manifestUrl = '';
  Eif (manifestLinkIdx !== -1) {
    const hrefIdx = html.lastIndexOf(manifestLinkPrefix, manifestLinkIdx);
    Eif (hrefIdx !== -1 && manifestLinkIdx - hrefIdx < 200) {
      const urlStart = hrefIdx + manifestLinkPrefix.length;
      const urlEnd = html.indexOf('"', urlStart);
      Eif (urlEnd !== -1) {
        manifestUrl = html.slice(urlStart, urlEnd);
      }
    }
  }
  const escapedAnalysisIntro = escapeHTML(analysisIndexIntroRaw);
  const localizedIntroWithLink = manifestUrl
    ? escapedAnalysisIntro.replace(
        'manifest.json',
        `<a href="${escapeHTML(manifestUrl)}">manifest.json</a>`
      )
    : escapedAnalysisIntro;
  const analysisIntroStart = 'Every artifact below was read by the aggregator';
  const analysisIntroIdx = html.indexOf(analysisIntroStart);
  if (analysisIntroIdx !== -1) {
    const analysisIntroEnd = html.indexOf('gate-result history.', analysisIntroIdx);
    Eif (analysisIntroEnd !== -1) {
      const endOffset = analysisIntroEnd + 'gate-result history.'.length;
      html = html.slice(0, analysisIntroIdx) + localizedIntroWithLink + html.slice(endOffset);
    }
  }
 
  const colSection = getLocalizedString(ANALYSIS_INDEX_COL_SECTION_LABELS, lang);
  const colArtifact = getLocalizedString(ANALYSIS_INDEX_COL_ARTIFACT_LABELS, lang);
  const colPath = getLocalizedString(ANALYSIS_INDEX_COL_PATH_LABELS, lang);
  html = html.replace(
    '<th>Section</th><th>Artifact</th><th>Path</th>',
    `<th>${escapeHTML(colSection)}</th><th>${escapeHTML(colArtifact)}</th><th>${escapeHTML(colPath)}</th>`
  );
 
  const keyTakeawaysHeading = getLocalizedString(KEY_TAKEAWAYS_HEADING_LABELS, lang);
  html = replaceHeadingById(html, KEY_TAKEAWAYS_SECTION_ID, 'Key Takeaways', keyTakeawaysHeading);
 
  const supplementaryHeading = getLocalizedString(SUPPLEMENTARY_HEADING_LABELS, lang);
  html = replaceHeadingById(
    html,
    SUPPLEMENTARY_SECTION_ID,
    'Supplementary Intelligence',
    supplementaryHeading
  );
 
  return html;
}
 
/**
 * Replace the first literal occurrence of `needle` in `haystack` with
 * `replacement`. Uses `indexOf` rather than `String.prototype.replace`
 * with a regex so we don't fall foul of the security/detect-unsafe-regex
 * lint rule, and so we never accidentally interpret regex metacharacters
 * inside `needle` or `$1`-style references inside `replacement`.
 *
 * @param haystack - String to search in
 * @param needle - Literal substring to replace
 * @param replacement - Literal replacement text (no `$` escaping needed)
 * @returns Modified string, or `haystack` unchanged when `needle` is absent
 */
export function replaceFirstStringIn(
  haystack: string,
  needle: string,
  replacement: string
): string {
  const idx = haystack.indexOf(needle);
  if (idx === -1) return haystack;
  return haystack.slice(0, idx) + replacement + haystack.slice(idx + needle.length);
}
 
/**
 * Locate the cut point that ends the Executive Brief body — the start of
 * the next top-level boundary heading after `afterHeading`. A boundary is
 * any `<h2>` whose `id` either starts with the canonical `section-` prefix
 * or exactly matches one of {@link EXECUTIVE_BRIEF_BOUNDARY_ID_MARKERS}
 * (Reader Guide / Tradecraft / Analysis Index / Supplementary appendices).
 *
 * Critically, this only matches **top-level** section anchors — never the
 * brief's own internal `<h2>` sub-headings (`## BLUF`, `## 60-Second Read`,
 * …), which carry slugified ids without the `section-` prefix. That is why
 * we cannot simply look for the next `<h2`.
 *
 * Uses `indexOf`/`lastIndexOf` exclusively (no regex) to stay within
 * CodeQL's safe-regex envelope.
 *
 * @param html - Full article body HTML
 * @param afterHeading - Index immediately after the Executive Brief `</h2>`
 * @returns Index of the next boundary `<h2`, or `-1` when the Executive
 *          Brief is the last block in the body.
 */
export function findExecutiveBriefSectionCut(html: string, afterHeading: number): number {
  let best = -1;
  const consider = (markerIdx: number): void => {
    if (markerIdx === -1) return;
    const h2 = html.lastIndexOf('<h2', markerIdx);
    Iif (h2 === -1 || h2 < afterHeading) return;
    if (best === -1 || h2 < best) best = h2;
  };
  consider(html.indexOf('id="section-', afterHeading));
  for (const marker of EXECUTIVE_BRIEF_BOUNDARY_ID_MARKERS) {
    consider(html.indexOf(marker, afterHeading));
  }
  return best;
}
 
/**
 * Replace the **inner body** of the Executive Brief section (the
 * `<h2 id="section-executive-brief">…</h2>` heading and everything that
 * follows it up to — but not including — the next top-level boundary
 * heading) with the supplied replacement HTML. The Executive Brief
 * heading itself is preserved by emitting it inline ahead of the
 * replacement, so the in-page anchor (`#section-executive-brief`) and
 * the table-of-contents link continue to work.
 *
 * Used by the article-generator HTML pipeline to inject the rendered
 * markdown of a translated `executive-brief_<lang>.md` into the
 * non-English language variants without forking the whole aggregated
 * article into 14 source-language copies — see
 * `editorial-brief-resolver.readLocalizedBriefBody` and
 * `render-one.writeLanguageVariant`.
 *
 * Implementation uses `indexOf`/slice exclusively to stay within
 * CodeQL's safe-regex envelope. The replacement spans from the heading to
 * the next top-level boundary (see {@link findExecutiveBriefSectionCut});
 * when the Executive Brief is the last block in the body the replacement
 * extends to end-of-body. Returns `html` unchanged only when the Executive
 * Brief heading is absent or malformed.
 *
 * @param html - Full article body HTML
 * @param localizedHeading - Localized text for the Executive Brief H2
 *                           (e.g. `"Sammanfattning"` for `sv`). Must be
 *                           plain text — caller is responsible for any
 *                           escaping (it's passed through `escapeHTML`).
 * @param replacementBodyHtml - HTML to splice in **after** the heading.
 *                              Should not contain its own `<h2>` for
 *                              the Executive Brief — the heading is
 *                              re-emitted by this helper.
 * @returns Updated HTML with the localized brief body in place.
 */
export function replaceExecutiveBriefSection(
  html: string,
  localizedHeading: string,
  replacementBodyHtml: string
): string {
  const idMarker = 'id="section-executive-brief"';
  const idIdx = html.indexOf(idMarker);
  if (idIdx === -1) return html;
 
  // Walk back to the opening `<h2` of the Executive Brief heading.
  const h2Open = html.lastIndexOf('<h2', idIdx);
  Iif (h2Open === -1) return html;
 
  // Find the end of the heading element.
  const h2CloseTagIdx = html.indexOf('</h2>', idIdx);
  Iif (h2CloseTagIdx === -1) return html;
  const afterHeading = h2CloseTagIdx + '</h2>'.length;
 
  // Find the next top-level boundary heading — the start of the following
  // article section or appendix. When none exists the Executive Brief is
  // the last block, so we replace through end-of-body. This guarantees the
  // localized brief is spliced even on sparse runs (previously the splice
  // bailed and non-English readers were stranded on the English brief).
  const nextH2 = findExecutiveBriefSectionCut(html, afterHeading);
  let cutEnd: number;
  if (nextH2 === -1) {
    cutEnd = html.length;
  } else {
    // Start of the line containing the next `<h2` so we don't strip
    // leading whitespace from the next section.
    cutEnd = nextH2;
    const prevNewline = html.lastIndexOf('\n', nextH2 - 1);
    Eif (prevNewline !== -1 && prevNewline >= afterHeading) {
      cutEnd = prevNewline + 1;
    }
  }
 
  const newHeading = `<h2 id="section-executive-brief">${escapeHTML(localizedHeading)}</h2>\n`;
  const trimmedReplacement = replacementBodyHtml.endsWith('\n')
    ? replacementBodyHtml
    : `${replacementBodyHtml}\n`;
 
  return html.slice(0, h2Open) + newHeading + trimmedReplacement + html.slice(cutEnd);
}
 
/**
 * Replace an H2 heading's text content by locating it via its `id` attribute.
 * Uses indexOf-based search to avoid polynomial regex backtracking (CodeQL).
 *
 * @param html - Full HTML string
 * @param sectionId - The id attribute value of the target `<h2>`
 * @param englishTitle - The English title text to replace
 * @param localizedTitle - The localized title to insert
 * @returns Updated HTML string
 */
export function replaceHeadingById(
  html: string,
  sectionId: string,
  englishTitle: string,
  localizedTitle: string
): string {
  const idMarker = `id="${sectionId}"`;
  let idIdx = html.indexOf(idMarker);
  if (idIdx === -1) {
    const idMarkerSingle = `id='${sectionId}'`;
    idIdx = html.indexOf(idMarkerSingle);
  }
  if (idIdx === -1) return html;
 
  const tagCloseIdx = html.indexOf('>', idIdx);
  Iif (tagCloseIdx === -1) return html;
 
  const titleStart = tagCloseIdx + 1;
  const titleEnd = html.indexOf('<', titleStart);
  Iif (titleEnd === -1) return html;
 
  const existingTitle = html.slice(titleStart, titleEnd);
  Iif (existingTitle.trim() !== englishTitle) return html;
 
  return html.slice(0, titleStart) + escapeHTML(localizedTitle) + html.slice(titleEnd);
}