MCPcopy
hub / github.com/scrapy/scrapy / test_process_links

Method test_process_links

tests/test_spider_crawl.py:51–72  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

49 ]
50
51 def test_process_links(self):
52 response = HtmlResponse(
53 "http://example.org/somepage/index.html", body=self.test_body
54 )
55
56 class _CrawlSpider(self.spider_class):
57 name = "test"
58 allowed_domains = ["example.org"]
59 rules = (Rule(LinkExtractor(), process_links="dummy_process_links"),)
60
61 def dummy_process_links(self, links):
62 return links
63
64 spider = _CrawlSpider()
65 output = list(spider._requests_to_follow(response))
66 assert len(output) == 3
67 assert all(isinstance(r, Request) for r in output)
68 assert [r.url for r in output] == [
69 "http://example.org/somepage/item/12.html",
70 "http://example.org/about.html",
71 "http://example.org/nofollow.html",
72 ]
73
74 def test_process_links_filter(self):
75 response = HtmlResponse(

Callers

nothing calls this directly

Calls 3

HtmlResponseClass · 0.90
_requests_to_followMethod · 0.80
_CrawlSpiderClass · 0.70

Tested by

no test coverage detected