(self)
| 654 | return raw_text |
| 655 | |
| 656 | def _build_doc(self): |
| 657 | from bs4 import BeautifulSoup |
| 658 | |
| 659 | bdoc = self._setup_build_doc() |
| 660 | if isinstance(bdoc, bytes) and self.encoding is not None: |
| 661 | udoc = bdoc.decode(self.encoding) |
| 662 | from_encoding = None |
| 663 | else: |
| 664 | udoc = bdoc |
| 665 | from_encoding = self.encoding |
| 666 | |
| 667 | soup = BeautifulSoup(udoc, features="html5lib", from_encoding=from_encoding) |
| 668 | |
| 669 | for br in soup.find_all("br"): |
| 670 | br.replace_with("\n" + br.text) |
| 671 | |
| 672 | return soup |
| 673 | |
| 674 | |
| 675 | def _build_xpath_expr(attrs) -> str: |
nothing calls this directly
no test coverage detected