MCPcopy
hub / github.com/scrapy/scrapy / test_process_request

Method test_process_request

tests/test_spider_crawl.py:120–143  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

118 ]
119
120 def test_process_request(self):
121 response = HtmlResponse(
122 "http://example.org/somepage/index.html", body=self.test_body
123 )
124
125 def process_request_change_domain(request, response):
126 return request.replace(url=request.url.replace(".org", ".com"))
127
128 class _CrawlSpider(self.spider_class):
129 name = "test"
130 allowed_domains = ["example.org"]
131 rules = (
132 Rule(LinkExtractor(), process_request=process_request_change_domain),
133 )
134
135 spider = _CrawlSpider()
136 output = list(spider._requests_to_follow(response))
137 assert len(output) == 3
138 assert all(isinstance(r, Request) for r in output)
139 assert [r.url for r in output] == [
140 "http://example.com/somepage/item/12.html",
141 "http://example.com/about.html",
142 "http://example.com/nofollow.html",
143 ]
144
145 def test_process_request_with_response(self):
146 response = HtmlResponse(

Callers

nothing calls this directly

Calls 3

HtmlResponseClass · 0.90
_requests_to_followMethod · 0.80
_CrawlSpiderClass · 0.70

Tested by

no test coverage detected