MCPcopy
hub / github.com/scrapy/scrapy / xmliter_lxml

Function xmliter_lxml

scrapy/utils/iterators.py:81–121  ·  view source on GitHub ↗
(
    obj: Response | str | bytes,
    nodename: str,
    namespace: str | None = None,
    prefix: str = "x",
)

Source from the content-addressed store, hash-verified

79
80
81def xmliter_lxml(
82 obj: Response | str | bytes,
83 nodename: str,
84 namespace: str | None = None,
85 prefix: str = "x",
86) -> Iterator[Selector]:
87 reader = _StreamReader(obj)
88 tag = f"{{{namespace}}}{nodename}" if namespace else nodename
89 iterable = etree.iterparse(
90 reader,
91 encoding=reader.encoding,
92 events=("end", "start-ns"),
93 resolve_entities=False,
94 huge_tree=True,
95 )
96 selxpath = "//" + (f"{prefix}:{nodename}" if namespace else nodename)
97 needs_namespace_resolution = not namespace and ":" in nodename
98 if needs_namespace_resolution:
99 prefix, nodename = nodename.split(":", maxsplit=1)
100 for event, data in iterable:
101 if event == "start-ns":
102 assert isinstance(data, tuple)
103 if needs_namespace_resolution:
104 _prefix, _namespace = data
105 if _prefix != prefix:
106 continue
107 namespace = _namespace
108 needs_namespace_resolution = False
109 selxpath = f"//{prefix}:{nodename}"
110 tag = f"{{{namespace}}}{nodename}"
111 continue
112 assert isinstance(data, etree._Element)
113 node = data
114 if node.tag != tag:
115 continue
116 nodetext = etree.tostring(node, encoding="unicode")
117 node.clear()
118 xs = Selector(text=nodetext, type="xml")
119 if namespace:
120 xs.register_namespace(prefix, namespace)
121 yield xs.xpath(selxpath)[0]
122
123
124class _StreamReader:

Callers 2

_iternodesMethod · 0.90
xmliterMethod · 0.90

Calls 4

SelectorClass · 0.90
_StreamReaderClass · 0.85
clearMethod · 0.80
xpathMethod · 0.45

Tested by 1

xmliterMethod · 0.72