hub / github.com/webpack/webpack / decodeOneReference

Function decodeOneReference

lib/html/syntax.js:3626–3698 · view source on GitHub ↗

(match, nextCharCode, isAttribute)

Source from the content-addressed store, hash-verified

3624	* @returns {string} decoded text, or `match` itself when it stays literal
3625	*/
3626	const decodeOneReference = (match, nextCharCode, isAttribute) => {
3627	// Numeric reference: A or A
3628	if (match.charCodeAt(1) === 0x23 /* # */) {
3629	const lastChar = match.charAt(match.length - 1);
3630	const isHex = match.charCodeAt(2) === 0x78 \|\| match.charCodeAt(2) === 0x58;
3631	const body = isHex
3632	? lastChar === ";"
3633	? match.slice(3, -1)
3634	: match.slice(3)
3635	: lastChar === ";"
3636	? match.slice(2, -1)
3637	: match.slice(2);
3638	// The regex above guarantees at least one digit in `body`,
3639	// so `parseInt` always returns a finite number here.
3640	return decodeNumericReference(Number.parseInt(body, isHex ? 16 : 10));
3641	}
3642
3643	// Named reference. Try the full captured name first, then
3644	// progressively shorter prefixes - this handles direct matches
3645	// like `&` as well as WHATWG longest-prefix semantics where
3646	// e.g. `&notpre;` decodes as `&not` (a legacy bare entity)
3647	// followed by `pre;` as literal text.
3648	const name = match.slice(1);
3649	const matchEndsWithSemi = name.charCodeAt(name.length - 1) === 0x3b;
3650
3651	// Attribute-context guard: if the entity match didn't end with `;`
3652	// and the next character in the source is `=` or ASCII
3653	// alphanumeric, the WHATWG spec says to flush the literal text
3654	// rather than decode. The greedy regex already absorbed any
3655	// trailing alphanumerics, so the only candidate "next char" here
3656	// is `=` (or any non-alphanumeric).
3657	if (isAttribute && !matchEndsWithSemi && nextCharCode === 0x3d /* = */) {
3658	return match;
3659	}
3660
3661	// Fast path: the regex usually captures exactly one entity (`&`,
3662	// `<`, ` `, …), so the whole `name` is the match — avoid the
3663	// full-length `name.slice(0, name.length)` the loop's first iteration
3664	// would allocate. No leftover, so the attribute guard never applies.
3665	if (name.length <= MAX_ENTITY_NAME_LEN) {
3666	const full = HTML_ENTITIES[name];
3667	if (full !== undefined) return full;
3668	}
3669
3670	// Cap the longest-prefix search at MAX_ENTITY_NAME_LEN so pathological
3671	// inputs like `&` + thousands of alphanumerics stay linear-time.
3672	// Anything past that cap can't possibly match and is appended
3673	// verbatim as part of `name.slice(i)`. The full-length case was just
3674	// handled above, so start one shorter when it's the cap.
3675	const searchLen =
3676	name.length > MAX_ENTITY_NAME_LEN ? MAX_ENTITY_NAME_LEN : name.length - 1;
3677	for (let i = searchLen; i > 0; i--) {
3678	const prefix = name.slice(0, i);
3679	if (HTML_ENTITIES[prefix] !== undefined) {
3680	// Attribute-context longest-prefix guard: if the matched
3681	// prefix doesn't end with `;` and the leftover starts with
3682	// an alphanumeric character, leave literal per WHATWG.
3683	// (The regex greedy-consumes alphanumerics, so any leftover

Callers 2

decodeHtmlEntitiesFunction · 0.85

decodeHtmlEntitiesWithMapFunction · 0.85

Calls 2

decodeNumericReferenceFunction · 0.85

sliceMethod · 0.80

Tested by

no test coverage detected