(self)
| 68 | assert nodenames == [["matchme..."]] |
| 69 | |
| 70 | def test_xmliter_unicode(self): |
| 71 | # example taken from https://github.com/scrapy/scrapy/issues/1665 |
| 72 | body = """<?xml version="1.0" encoding="UTF-8"?> |
| 73 | <þingflokkar> |
| 74 | <þingflokkur id="26"> |
| 75 | <heiti /> |
| 76 | <skammstafanir> |
| 77 | <stuttskammstöfun>-</stuttskammstöfun> |
| 78 | <löngskammstöfun /> |
| 79 | </skammstafanir> |
| 80 | <tímabil> |
| 81 | <fyrstaþing>80</fyrstaþing> |
| 82 | </tímabil> |
| 83 | </þingflokkur> |
| 84 | <þingflokkur id="21"> |
| 85 | <heiti>Alþýðubandalag</heiti> |
| 86 | <skammstafanir> |
| 87 | <stuttskammstöfun>Ab</stuttskammstöfun> |
| 88 | <löngskammstöfun>Alþb.</löngskammstöfun> |
| 89 | </skammstafanir> |
| 90 | <tímabil> |
| 91 | <fyrstaþing>76</fyrstaþing> |
| 92 | <síðastaþing>123</síðastaþing> |
| 93 | </tímabil> |
| 94 | </þingflokkur> |
| 95 | <þingflokkur id="27"> |
| 96 | <heiti>Alþýðuflokkur</heiti> |
| 97 | <skammstafanir> |
| 98 | <stuttskammstöfun>A</stuttskammstöfun> |
| 99 | <löngskammstöfun>Alþfl.</löngskammstöfun> |
| 100 | </skammstafanir> |
| 101 | <tímabil> |
| 102 | <fyrstaþing>27</fyrstaþing> |
| 103 | <síðastaþing>120</síðastaþing> |
| 104 | </tímabil> |
| 105 | </þingflokkur> |
| 106 | </þingflokkar>""" |
| 107 | |
| 108 | for r in ( |
| 109 | # with bytes |
| 110 | XmlResponse(url="http://example.com", body=body.encode("utf-8")), |
| 111 | # Unicode body needs encoding information |
| 112 | XmlResponse(url="http://example.com", body=body, encoding="utf-8"), |
| 113 | ): |
| 114 | attrs = [ |
| 115 | ( |
| 116 | x.attrib["id"], |
| 117 | x.xpath("./skammstafanir/stuttskammstöfun/text()").getall(), |
| 118 | x.xpath("./tímabil/fyrstaþing/text()").getall(), |
| 119 | ) |
| 120 | for x in self.xmliter(r, "þingflokkur") |
| 121 | ] |
| 122 | |
| 123 | assert attrs == [ |
| 124 | ("26", ["-"], ["80"]), |
| 125 | ("21", ["Ab"], ["76"]), |
| 126 | ("27", ["A"], ["27"]), |
| 127 | ] |
nothing calls this directly
no test coverage detected