(match, nextCharCode, isAttribute)
| 3624 | * @returns {string} decoded text, or `match` itself when it stays literal |
| 3625 | */ |
| 3626 | const decodeOneReference = (match, nextCharCode, isAttribute) => { |
| 3627 | // Numeric reference: A or A |
| 3628 | if (match.charCodeAt(1) === 0x23 /* # */) { |
| 3629 | const lastChar = match.charAt(match.length - 1); |
| 3630 | const isHex = match.charCodeAt(2) === 0x78 || match.charCodeAt(2) === 0x58; |
| 3631 | const body = isHex |
| 3632 | ? lastChar === ";" |
| 3633 | ? match.slice(3, -1) |
| 3634 | : match.slice(3) |
| 3635 | : lastChar === ";" |
| 3636 | ? match.slice(2, -1) |
| 3637 | : match.slice(2); |
| 3638 | // The regex above guarantees at least one digit in `body`, |
| 3639 | // so `parseInt` always returns a finite number here. |
| 3640 | return decodeNumericReference(Number.parseInt(body, isHex ? 16 : 10)); |
| 3641 | } |
| 3642 | |
| 3643 | // Named reference. Try the full captured name first, then |
| 3644 | // progressively shorter prefixes - this handles direct matches |
| 3645 | // like `&` as well as WHATWG longest-prefix semantics where |
| 3646 | // e.g. `¬pre;` decodes as `¬` (a legacy bare entity) |
| 3647 | // followed by `pre;` as literal text. |
| 3648 | const name = match.slice(1); |
| 3649 | const matchEndsWithSemi = name.charCodeAt(name.length - 1) === 0x3b; |
| 3650 | |
| 3651 | // Attribute-context guard: if the entity match didn't end with `;` |
| 3652 | // and the next character in the source is `=` or ASCII |
| 3653 | // alphanumeric, the WHATWG spec says to flush the literal text |
| 3654 | // rather than decode. The greedy regex already absorbed any |
| 3655 | // trailing alphanumerics, so the only candidate "next char" here |
| 3656 | // is `=` (or any non-alphanumeric). |
| 3657 | if (isAttribute && !matchEndsWithSemi && nextCharCode === 0x3d /* = */) { |
| 3658 | return match; |
| 3659 | } |
| 3660 | |
| 3661 | // Fast path: the regex usually captures exactly one entity (`&`, |
| 3662 | // `<`, ` `, …), so the whole `name` is the match — avoid the |
| 3663 | // full-length `name.slice(0, name.length)` the loop's first iteration |
| 3664 | // would allocate. No leftover, so the attribute guard never applies. |
| 3665 | if (name.length <= MAX_ENTITY_NAME_LEN) { |
| 3666 | const full = HTML_ENTITIES[name]; |
| 3667 | if (full !== undefined) return full; |
| 3668 | } |
| 3669 | |
| 3670 | // Cap the longest-prefix search at MAX_ENTITY_NAME_LEN so pathological |
| 3671 | // inputs like `&` + thousands of alphanumerics stay linear-time. |
| 3672 | // Anything past that cap can't possibly match and is appended |
| 3673 | // verbatim as part of `name.slice(i)`. The full-length case was just |
| 3674 | // handled above, so start one shorter when it's the cap. |
| 3675 | const searchLen = |
| 3676 | name.length > MAX_ENTITY_NAME_LEN ? MAX_ENTITY_NAME_LEN : name.length - 1; |
| 3677 | for (let i = searchLen; i > 0; i--) { |
| 3678 | const prefix = name.slice(0, i); |
| 3679 | if (HTML_ENTITIES[prefix] !== undefined) { |
| 3680 | // Attribute-context longest-prefix guard: if the matched |
| 3681 | // prefix doesn't end with `;` and the leftover starts with |
| 3682 | // an alphanumeric character, leave literal per WHATWG. |
| 3683 | // (The regex greedy-consumes alphanumerics, so any leftover |
no test coverage detected