Skip to content

Commit 727fe55

Browse files
committed
fix(web): decode multi-level HTML entities in display text
Add shared utility to decode HTML entities iteratively, handling double/triple-encoded entities like & → &. Applied to RelationshipItem display names and EntityDataDisplay text rendering. OpenAlex API data sometimes contains double-encoded entities.
1 parent 60cbb00 commit 727fe55

File tree

3 files changed

+72
-19
lines changed

3 files changed

+72
-19
lines changed

apps/web/src/components/EntityDataDisplay.tsx

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,27 +36,11 @@ import { Link } from "@tanstack/react-router";
3636

3737
import { ICON_SIZE } from "@/config/style-constants";
3838
import { useVersionComparison } from "@/hooks/use-version-comparison";
39+
import { decodeHtmlEntities } from "@/utils/decode-html-entities";
3940
import { humanizeFieldName } from "@/utils/field-labels";
4041
import { formatNumber } from "@/utils/format-number";
4142
import { convertOpenAlexToInternalLink, isOpenAlexId } from "@/utils/openalex-link-conversion";
4243

43-
/**
44-
* Decode HTML entities in text
45-
* Handles common entities like &, <, >, ", etc.
46-
*/
47-
const decodeHtmlEntities = (text: string): string => {
48-
const entities: Record<string, string> = {
49-
"&amp;": "&",
50-
"&lt;": "<",
51-
"&gt;": ">",
52-
"&quot;": '"',
53-
"&#39;": "'",
54-
"&apos;": "'",
55-
"&nbsp;": " ",
56-
};
57-
return text.replaceAll(/&(?:amp|lt|gt|quot|apos|nbsp|#39);/g, (match) => entities[match] ?? match);
58-
};
59-
6044
/** Section priority for consistent ordering */
6145
const SECTION_PRIORITY: Record<string, number> = {
6246
Identifiers: 1,

apps/web/src/components/relationship/RelationshipItem.tsx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { useNavigate } from '@tanstack/react-router';
1010
import React from 'react';
1111

1212
import type { RelationshipItem as RelationshipItemType } from '@/types/relationship';
13+
import { decodeHtmlEntities } from '@/utils/decode-html-entities';
1314
import { formatMetadata } from '@/utils/formatMetadata';
1415

1516
export interface RelationshipItemProps {
@@ -75,7 +76,7 @@ export const RelationshipItem: React.FC<RelationshipItemProps> = ({ item }) => {
7576
<Stack gap="xs" data-testid={`relationship-item-${item.id}`}>
7677
<Group gap="xs">
7778
<Anchor href={entityUrl} onClick={handleClick} size="sm">
78-
{item.displayName || cleanEntityId}
79+
{decodeHtmlEntities(item.displayName || cleanEntityId)}
7980
</Anchor>
8081
{item.isSelfReference && (
8182
<Text size="xs" c="dimmed">
@@ -85,7 +86,7 @@ export const RelationshipItem: React.FC<RelationshipItemProps> = ({ item }) => {
8586
</Group>
8687
{item.subtitle && (
8788
<Text size="xs" c="dimmed" data-testid="relationship-subtitle">
88-
{item.subtitle}
89+
{decodeHtmlEntities(item.subtitle)}
8990
</Text>
9091
)}
9192
{item.metadata && (
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/**
2+
* HTML Entity Decoding Utility
3+
*
4+
* Handles decoding of HTML entities in text, including multi-level encoding
5+
* (e.g., &amp;amp; -> &amp; -> &)
6+
*/
7+
8+
/**
9+
* Common HTML entity mappings
10+
*/
11+
const HTML_ENTITIES: Record<string, string> = {
12+
"&amp;": "&",
13+
"&lt;": "<",
14+
"&gt;": ">",
15+
"&quot;": '"',
16+
"&#39;": "'",
17+
"&apos;": "'",
18+
"&nbsp;": " ",
19+
};
20+
21+
/**
22+
* Pattern to match HTML entities we want to decode
23+
*/
24+
const ENTITY_PATTERN = /&(?:amp|lt|gt|quot|apos|nbsp|#39);/g;
25+
26+
/**
27+
* Maximum iterations to prevent infinite loops
28+
*/
29+
const MAX_DECODE_ITERATIONS = 5;
30+
31+
/**
32+
* Decode HTML entities in text, handling multiple levels of encoding
33+
*
34+
* Some data sources double or triple-encode HTML entities
35+
* (e.g., "&amp;amp;" should become "&" not "&amp;")
36+
*
37+
* @param text - The text containing HTML entities
38+
* @returns Text with all HTML entities decoded
39+
*
40+
* @example
41+
* decodeHtmlEntities("&amp;") // "&"
42+
* decodeHtmlEntities("&amp;amp;") // "&" (double-encoded)
43+
* decodeHtmlEntities("&amp;amp;amp;") // "&" (triple-encoded)
44+
* decodeHtmlEntities("Foo &amp;amp; Bar") // "Foo & Bar"
45+
*/
46+
export const decodeHtmlEntities = (text: string): string => {
47+
let result = text;
48+
let iterations = 0;
49+
50+
// Keep decoding until no more changes (handles multi-level encoding)
51+
// Use iteration limit to prevent infinite loops on malformed input
52+
while (iterations < MAX_DECODE_ITERATIONS) {
53+
const decoded = result.replaceAll(
54+
ENTITY_PATTERN,
55+
(match) => HTML_ENTITIES[match] ?? match
56+
);
57+
58+
// No more entities to decode
59+
if (decoded === result) {
60+
break;
61+
}
62+
63+
result = decoded;
64+
iterations++;
65+
}
66+
67+
return result;
68+
};

0 commit comments

Comments
 (0)