MCPcopy
hub / github.com/scrapy/scrapy / _process_sitemap_element

Method _process_sitemap_element

scrapy/utils/sitemap.py:62–96  ·  view source on GitHub ↗
(
        self, elem: lxml.etree._Element
    )

Source from the content-addressed store, hash-verified

60 yield d
61
62 def _process_sitemap_element(
63 self, elem: lxml.etree._Element
64 ) -> dict[str, Any] | None:
65 d: dict[str, Any] = {}
66 alternate: list[str] = []
67 has_loc = False
68
69 for el in elem:
70 try:
71 tag_name = self._get_tag_name(el)
72 if not tag_name:
73 continue
74
75 if tag_name == "link":
76 if href := el.get("href"):
77 alternate.append(href)
78 else:
79 d[tag_name] = el.text.strip() if el.text else ""
80 if not has_loc and tag_name == "loc":
81 has_loc = True
82 finally:
83 el.clear()
84 elem.clear()
85 parent = elem.getparent()
86 if parent is not None:
87 while elem.getprevious() is not None:
88 del parent[0]
89
90 if not has_loc:
91 return None
92
93 if alternate:
94 d["alternate"] = alternate
95
96 return d
97
98 @staticmethod
99 def _get_tag_name(elem: lxml.etree._Element) -> str:

Callers 1

__iter__Method · 0.95

Calls 3

_get_tag_nameMethod · 0.95
clearMethod · 0.80
getMethod · 0.45

Tested by

no test coverage detected