Raises ------ ValueError * If a URL that lxml cannot parse is passed. Exception * Any other ``Exception`` thrown. For example, trying to parse a URL that is syntactically correct on a machine with no internet conne
(self)
| 762 | return obj.tag == tag |
| 763 | |
| 764 | def _build_doc(self): |
| 765 | """ |
| 766 | Raises |
| 767 | ------ |
| 768 | ValueError |
| 769 | * If a URL that lxml cannot parse is passed. |
| 770 | |
| 771 | Exception |
| 772 | * Any other ``Exception`` thrown. For example, trying to parse a |
| 773 | URL that is syntactically correct on a machine with no internet |
| 774 | connection will fail. |
| 775 | |
| 776 | See Also |
| 777 | -------- |
| 778 | pandas.io.html._HtmlFrameParser._build_doc |
| 779 | """ |
| 780 | from lxml.etree import XMLSyntaxError |
| 781 | from lxml.html import ( |
| 782 | HTMLParser, |
| 783 | parse, |
| 784 | ) |
| 785 | |
| 786 | parser = HTMLParser(recover=True, encoding=self.encoding) |
| 787 | |
| 788 | if is_url(self.io): |
| 789 | with get_handle(self.io, "r", storage_options=self.storage_options) as f: |
| 790 | r = parse(f.handle, parser=parser) |
| 791 | else: |
| 792 | # try to parse the input in the simplest way |
| 793 | try: |
| 794 | r = parse(self.io, parser=parser) |
| 795 | except OSError as err: |
| 796 | raise FileNotFoundError( |
| 797 | f"[Errno {errno.ENOENT}] {os.strerror(errno.ENOENT)}: {self.io}" |
| 798 | ) from err |
| 799 | try: |
| 800 | r = r.getroot() |
| 801 | except AttributeError: |
| 802 | pass |
| 803 | else: |
| 804 | if not hasattr(r, "text_content"): |
| 805 | raise XMLSyntaxError("no text parsed from document", 0, 0, 0) |
| 806 | |
| 807 | for br in r.xpath("*//br"): |
| 808 | br.tail = "\n" + (br.tail or "") |
| 809 | |
| 810 | return r |
| 811 | |
| 812 | def _parse_thead_tr(self, table): |
| 813 | rows = [] |
nothing calls this directly
no test coverage detected