MCPcopy
hub / github.com/webpack/webpack / decodeOneReference

Function decodeOneReference

lib/html/syntax.js:3626–3698  ·  view source on GitHub ↗
(match, nextCharCode, isAttribute)

Source from the content-addressed store, hash-verified

3624 * @returns {string} decoded text, or `match` itself when it stays literal
3625 */
3626const decodeOneReference = (match, nextCharCode, isAttribute) => {
3627 // Numeric reference: A or A
3628 if (match.charCodeAt(1) === 0x23 /* # */) {
3629 const lastChar = match.charAt(match.length - 1);
3630 const isHex = match.charCodeAt(2) === 0x78 || match.charCodeAt(2) === 0x58;
3631 const body = isHex
3632 ? lastChar === ";"
3633 ? match.slice(3, -1)
3634 : match.slice(3)
3635 : lastChar === ";"
3636 ? match.slice(2, -1)
3637 : match.slice(2);
3638 // The regex above guarantees at least one digit in `body`,
3639 // so `parseInt` always returns a finite number here.
3640 return decodeNumericReference(Number.parseInt(body, isHex ? 16 : 10));
3641 }
3642
3643 // Named reference. Try the full captured name first, then
3644 // progressively shorter prefixes - this handles direct matches
3645 // like `&` as well as WHATWG longest-prefix semantics where
3646 // e.g. `&notpre;` decodes as `&not` (a legacy bare entity)
3647 // followed by `pre;` as literal text.
3648 const name = match.slice(1);
3649 const matchEndsWithSemi = name.charCodeAt(name.length - 1) === 0x3b;
3650
3651 // Attribute-context guard: if the entity match didn't end with `;`
3652 // and the next character in the source is `=` or ASCII
3653 // alphanumeric, the WHATWG spec says to flush the literal text
3654 // rather than decode. The greedy regex already absorbed any
3655 // trailing alphanumerics, so the only candidate "next char" here
3656 // is `=` (or any non-alphanumeric).
3657 if (isAttribute && !matchEndsWithSemi && nextCharCode === 0x3d /* = */) {
3658 return match;
3659 }
3660
3661 // Fast path: the regex usually captures exactly one entity (`&`,
3662 // `<`, ` `, …), so the whole `name` is the match — avoid the
3663 // full-length `name.slice(0, name.length)` the loop's first iteration
3664 // would allocate. No leftover, so the attribute guard never applies.
3665 if (name.length <= MAX_ENTITY_NAME_LEN) {
3666 const full = HTML_ENTITIES[name];
3667 if (full !== undefined) return full;
3668 }
3669
3670 // Cap the longest-prefix search at MAX_ENTITY_NAME_LEN so pathological
3671 // inputs like `&` + thousands of alphanumerics stay linear-time.
3672 // Anything past that cap can't possibly match and is appended
3673 // verbatim as part of `name.slice(i)`. The full-length case was just
3674 // handled above, so start one shorter when it's the cap.
3675 const searchLen =
3676 name.length > MAX_ENTITY_NAME_LEN ? MAX_ENTITY_NAME_LEN : name.length - 1;
3677 for (let i = searchLen; i > 0; i--) {
3678 const prefix = name.slice(0, i);
3679 if (HTML_ENTITIES[prefix] !== undefined) {
3680 // Attribute-context longest-prefix guard: if the matched
3681 // prefix doesn't end with `;` and the leftover starts with
3682 // an alphanumeric character, leave literal per WHATWG.
3683 // (The regex greedy-consumes alphanumerics, so any leftover

Callers 2

decodeHtmlEntitiesFunction · 0.85

Calls 2

decodeNumericReferenceFunction · 0.85
sliceMethod · 0.80

Tested by

no test coverage detected