MCPcopy
hub / github.com/scrapy/scrapy / test_xmliter_unicode

Method test_xmliter_unicode

tests/test_utils_iterators.py:70–127  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

68 assert nodenames == [["matchme..."]]
69
70 def test_xmliter_unicode(self):
71 # example taken from https://github.com/scrapy/scrapy/issues/1665
72 body = """<?xml version="1.0" encoding="UTF-8"?>
73 <þingflokkar>
74 <þingflokkur id="26">
75 <heiti />
76 <skammstafanir>
77 <stuttskammstöfun>-</stuttskammstöfun>
78 <löngskammstöfun />
79 </skammstafanir>
80 <tímabil>
81 <fyrstaþing>80</fyrstaþing>
82 </tímabil>
83 </þingflokkur>
84 <þingflokkur id="21">
85 <heiti>Alþýðubandalag</heiti>
86 <skammstafanir>
87 <stuttskammstöfun>Ab</stuttskammstöfun>
88 <löngskammstöfun>Alþb.</löngskammstöfun>
89 </skammstafanir>
90 <tímabil>
91 <fyrstaþing>76</fyrstaþing>
92 <síðastaþing>123</síðastaþing>
93 </tímabil>
94 </þingflokkur>
95 <þingflokkur id="27">
96 <heiti>Alþýðuflokkur</heiti>
97 <skammstafanir>
98 <stuttskammstöfun>A</stuttskammstöfun>
99 <löngskammstöfun>Alþfl.</löngskammstöfun>
100 </skammstafanir>
101 <tímabil>
102 <fyrstaþing>27</fyrstaþing>
103 <síðastaþing>120</síðastaþing>
104 </tímabil>
105 </þingflokkur>
106 </þingflokkar>"""
107
108 for r in (
109 # with bytes
110 XmlResponse(url="http://example.com", body=body.encode("utf-8")),
111 # Unicode body needs encoding information
112 XmlResponse(url="http://example.com", body=body, encoding="utf-8"),
113 ):
114 attrs = [
115 (
116 x.attrib["id"],
117 x.xpath("./skammstafanir/stuttskammstöfun/text()").getall(),
118 x.xpath("./tímabil/fyrstaþing/text()").getall(),
119 )
120 for x in self.xmliter(r, "þingflokkur")
121 ]
122
123 assert attrs == [
124 ("26", ["-"], ["80"]),
125 ("21", ["Ab"], ["76"]),
126 ("27", ["A"], ["27"]),
127 ]

Callers

nothing calls this directly

Calls 3

xmliterMethod · 0.95
XmlResponseClass · 0.90
xpathMethod · 0.45

Tested by

no test coverage detected