(self)
| 61 | ] |
| 62 | |
| 63 | def test_http_header_encoding_precedence(self): |
| 64 | # '\xa3' = pound symbol in unicode |
| 65 | # '\xc2\xa3' = pound symbol in utf-8 |
| 66 | # '\xa3' = pound symbol in latin-1 (iso-8859-1) |
| 67 | |
| 68 | meta = ( |
| 69 | '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">' |
| 70 | ) |
| 71 | head = f"<head>{meta}</head>" |
| 72 | body_content = '<span id="blank">\xa3</span>' |
| 73 | body = f"<body>{body_content}</body>" |
| 74 | html = f"<html>{head}{body}</html>" |
| 75 | encoding = "utf-8" |
| 76 | html_utf8 = html.encode(encoding) |
| 77 | |
| 78 | headers = {"Content-Type": ["text/html; charset=utf-8"]} |
| 79 | response = HtmlResponse( |
| 80 | url="http://example.com", headers=headers, body=html_utf8 |
| 81 | ) |
| 82 | x = Selector(response) |
| 83 | assert x.xpath("//span[@id='blank']/text()").getall() == ["\xa3"] |
| 84 | |
| 85 | def test_badly_encoded_body(self): |
| 86 | # \xe9 alone isn't valid utf8 sequence |
nothing calls this directly
no test coverage detected