hub / github.com/scrapy/scrapy / _process_sitemap_element

Method _process_sitemap_element

scrapy/utils/sitemap.py:62–96 · view source on GitHub ↗

(
        self, elem: lxml.etree._Element
    )

Source from the content-addressed store, hash-verified

60	yield d
61
62	def _process_sitemap_element(
63	self, elem: lxml.etree._Element
64	) -> dict[str, Any] \| None:
65	d: dict[str, Any] = {}
66	alternate: list[str] = []
67	has_loc = False
68
69	for el in elem:
70	try:
71	tag_name = self._get_tag_name(el)
72	if not tag_name:
73	continue
74
75	if tag_name == "link":
76	if href := el.get("href"):
77	alternate.append(href)
78	else:
79	d[tag_name] = el.text.strip() if el.text else ""
80	if not has_loc and tag_name == "loc":
81	has_loc = True
82	finally:
83	el.clear()
84	elem.clear()
85	parent = elem.getparent()
86	if parent is not None:
87	while elem.getprevious() is not None:
88	del parent[0]
89
90	if not has_loc:
91	return None
92
93	if alternate:
94	d["alternate"] = alternate
95
96	return d
97
98	@staticmethod
99	def _get_tag_name(elem: lxml.etree._Element) -> str:

__iter__Method · 0.95

_get_tag_nameMethod · 0.95

clearMethod · 0.80

getMethod · 0.45

no test coverage detected