(input, pos = 0, callbacks = {})
| 375 | * @returns {number} final position |
| 376 | */ |
| 377 | const walkHtmlTokens = (input, pos = 0, callbacks = {}) => { |
| 378 | const len = input.length; |
| 379 | let state = STATE_DATA; |
| 380 | let returnState = STATE_DATA; |
| 381 | |
| 382 | let textStart = pos; |
| 383 | let tagStart = pos; |
| 384 | let tagNameStart = -1; |
| 385 | let tagNameEnd = -1; |
| 386 | let attrNameStart = -1; |
| 387 | let attrNameEnd = -1; |
| 388 | let attrValueStart = -1; |
| 389 | let attrQuoteType = QUOTE_NONE; |
| 390 | let commentStart = pos; |
| 391 | let lastOpenTagName = ""; |
| 392 | // Tag-name offsets of the last open tag; the lowercased `lastOpenTagName` is |
| 393 | // derived from these lazily (only for special-content tags). |
| 394 | let lastOpenTagStart = -1; |
| 395 | let lastOpenTagEnd = -1; |
| 396 | // Counter used by SCRIPT_DATA_DOUBLE_ESCAPE_{START,END} to detect whether |
| 397 | // the ASCII-alpha run after `<` / `</` spells exactly `"script"`. Values |
| 398 | // 0..6 = number of chars matched so far; 7 = no longer matches (sentinel). |
| 399 | // Avoids growing a buffer for pathological inputs with long alpha runs. |
| 400 | let scriptMatch = 0; |
| 401 | let namedEntityConsumed = 0; |
| 402 | // Offset of the opening `&` and the running numeric value (clamped past the |
| 403 | // Unicode range so it can't overflow); used for numeric-reference errors. |
| 404 | let charRefStart = -1; |
| 405 | let charRefCode = 0; |
| 406 | // Tracks whether the current tag has parsed any attributes — used to |
| 407 | // fire the `end-tag-with-attributes` parse error when an end tag emits. |
| 408 | let tagHasAttributes = false; |
| 409 | |
| 410 | /** |
| 411 | * Reports a tokenizer parse error to the consumer. The offset range and |
| 412 | * severity follow the WHATWG spec naming. Severity is `"error"` for |
| 413 | * cases where the emitted token is incomplete (EOF inside a tag or |
| 414 | * comment); everything else is a `"warning"`. Offsets are JS string |
| 415 | * indices (UTF-16 code-unit offsets into `input`). |
| 416 | * @param {string} code WHATWG parse-error code (kebab-case) |
| 417 | * @param {number} start string offset where the error starts |
| 418 | * @param {number} end string offset where the error ends |
| 419 | * @param {ParseErrorSeverity} severity error severity |
| 420 | */ |
| 421 | const reportError = (code, start, end, severity) => { |
| 422 | if (callbacks.parseError !== undefined) { |
| 423 | callbacks.parseError(input, code, start, end, severity); |
| 424 | } |
| 425 | }; |
| 426 | |
| 427 | /** |
| 428 | * Emits the WHATWG numeric-character-reference validation parse error for |
| 429 | * the accumulated `charRefCode`, if any. Used both inline (when the |
| 430 | * reference is terminated by a real next character) and at EOF (when the |
| 431 | * reference runs to the end of input). The scanner only flags the error — |
| 432 | * the spec's U+FFFD / Windows-1252 substitution is done by `decodeHtmlEntities`. |
| 433 | * @param {number} endPos offset just past the reference |
| 434 | */ |
no test coverage detected