Method test_process_links

tests/test_spider_crawl.py:51–72 · view source on GitHub ↗

(self)

Source from the content-addressed store, hash-verified

49	]
50
51	def test_process_links(self):
52	response = HtmlResponse(
53	"http://example.org/somepage/index.html", body=self.test_body
54	)
55
56	class _CrawlSpider(self.spider_class):
57	name = "test"
58	allowed_domains = ["example.org"]
59	rules = (Rule(LinkExtractor(), process_links="dummy_process_links"),)
60
61	def dummy_process_links(self, links):
62	return links
63
64	spider = _CrawlSpider()
65	output = list(spider._requests_to_follow(response))
66	assert len(output) == 3
67	assert all(isinstance(r, Request) for r in output)
68	assert [r.url for r in output] == [
69	"http://example.org/somepage/item/12.html",
70	"http://example.org/about.html",
71	"http://example.org/nofollow.html",
72	]
73
74	def test_process_links_filter(self):
75	response = HtmlResponse(

nothing calls this directly

HtmlResponseClass · 0.90

_requests_to_followMethod · 0.80

_CrawlSpiderClass · 0.70

no test coverage detected