| 202 | |
| 203 | |
| 204 | class Parser(HTMLParser): |
| 205 | def __init__(self): |
| 206 | super().__init__() |
| 207 | self.root = RootElement() |
| 208 | self.open_tags = [] |
| 209 | self.element_positions = {} |
| 210 | |
| 211 | def error(self, msg): |
| 212 | raise HTMLParseError(msg, self.getpos()) |
| 213 | |
| 214 | def format_position(self, position=None, element=None): |
| 215 | if not position and element: |
| 216 | position = self.element_positions[element] |
| 217 | if position is None: |
| 218 | position = self.getpos() |
| 219 | if hasattr(position, "lineno"): |
| 220 | position = position.lineno, position.offset |
| 221 | return "Line %d, Column %d" % position |
| 222 | |
| 223 | @property |
| 224 | def current(self): |
| 225 | if self.open_tags: |
| 226 | return self.open_tags[-1] |
| 227 | else: |
| 228 | return self.root |
| 229 | |
| 230 | def handle_startendtag(self, tag, attrs): |
| 231 | self.handle_starttag(tag, attrs) |
| 232 | if tag not in VOID_ELEMENTS: |
| 233 | self.handle_endtag(tag) |
| 234 | |
| 235 | def handle_starttag(self, tag, attrs): |
| 236 | attrs = normalize_attributes(attrs) |
| 237 | element = Element(tag, attrs) |
| 238 | self.current.append(element) |
| 239 | if tag not in VOID_ELEMENTS: |
| 240 | self.open_tags.append(element) |
| 241 | self.element_positions[element] = self.getpos() |
| 242 | |
| 243 | def handle_endtag(self, tag): |
| 244 | if not self.open_tags: |
| 245 | self.error("Unexpected end tag `%s` (%s)" % (tag, self.format_position())) |
| 246 | element = self.open_tags.pop() |
| 247 | while element.name != tag: |
| 248 | if not self.open_tags: |
| 249 | self.error( |
| 250 | "Unexpected end tag `%s` (%s)" % (tag, self.format_position()) |
| 251 | ) |
| 252 | element = self.open_tags.pop() |
| 253 | |
| 254 | def handle_data(self, data): |
| 255 | self.current.append(data) |
| 256 | |
| 257 | |
| 258 | def parse_html(html): |