MCPcopy
hub / github.com/scrapy/scrapy / TestSitemapSpider

Class TestSitemapSpider

tests/test_spider_sitemap.py:22–440  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

20
21
22class TestSitemapSpider(TestSpider):
23 spider_class = SitemapSpider
24
25 BODY = b"SITEMAP"
26 f = BytesIO()
27 g = gzip.GzipFile(fileobj=f, mode="w+b")
28 g.write(BODY)
29 g.close()
30 GZBODY = f.getvalue()
31
32 def assertSitemapBody(self, response: Response, body: bytes | None) -> None:
33 crawler = get_crawler()
34 spider = self.spider_class.from_crawler(crawler, "example.com")
35 assert spider._get_sitemap_body(response) == body
36
37 def test_get_sitemap_body(self):
38 r = XmlResponse(url="http://www.example.com/", body=self.BODY)
39 self.assertSitemapBody(r, self.BODY)
40
41 r = HtmlResponse(url="http://www.example.com/", body=self.BODY)
42 self.assertSitemapBody(r, None)
43
44 r = Response(url="http://www.example.com/favicon.ico", body=self.BODY)
45 self.assertSitemapBody(r, None)
46
47 r = XmlResponse(url="http://www.example.com/", body=b"")
48 self.assertSitemapBody(r, b"")
49
50 def test_get_sitemap_body_gzip_headers(self):
51 r = Response(
52 url="http://www.example.com/sitemap",
53 body=self.GZBODY,
54 headers={"content-type": "application/gzip"},
55 request=Request("http://www.example.com/sitemap"),
56 )
57 self.assertSitemapBody(r, self.BODY)
58
59 def test_get_sitemap_body_xml_url(self):
60 r = TextResponse(url="http://www.example.com/sitemap.xml", body=self.BODY)
61 self.assertSitemapBody(r, self.BODY)
62
63 def test_get_sitemap_body_xml_url_compressed(self):
64 r = Response(
65 url="http://www.example.com/sitemap.xml.gz",
66 body=self.GZBODY,
67 request=Request("http://www.example.com/sitemap"),
68 )
69 self.assertSitemapBody(r, self.BODY)
70
71 # .xml.gz but body decoded by HttpCompression middleware already
72 r = Response(url="http://www.example.com/sitemap.xml.gz", body=self.BODY)
73 self.assertSitemapBody(r, self.BODY)
74
75 def test_get_sitemap_urls_from_robotstxt(self):
76 robots = b"""# Sitemap files
77Sitemap: http://example.com/sitemap.xml
78Sitemap: http://example.com/sitemap-product-index.xml
79Sitemap: HTTP://example.com/sitemap-uppercase.xml

Callers

nothing calls this directly

Calls 2

writeMethod · 0.45
closeMethod · 0.45

Tested by

no test coverage detected