MCPcopy
hub / github.com/scrapy/scrapy / test_processed_both

Function test_processed_both

tests/test_spidermiddleware_base.py:90–130  ·  view source on GitHub ↗
(crawler: Crawler)

Source from the content-addressed store, hash-verified

88
89@coroutine_test
90async def test_processed_both(crawler: Crawler) -> None:
91 class ProcessBothSpiderMiddleware(BaseSpiderMiddleware):
92 def get_processed_request(
93 self, request: Request, response: Response | None
94 ) -> Request | None:
95 if request.url == "data:2,":
96 return None
97 if request.url == "data:3,":
98 return Request("data:30,")
99 return request
100
101 def get_processed_item(self, item: Any, response: Response | None) -> Any:
102 if item["foo"] == 2:
103 return None
104 if item["foo"] == 3:
105 item["foo"] = 30
106 return item
107
108 mw = ProcessBothSpiderMiddleware.from_crawler(crawler)
109 test_req1 = Request("data:1,")
110 test_req2 = Request("data:2,")
111 test_req3 = Request("data:3,")
112 spider_output = [
113 test_req1,
114 {"foo": 1},
115 {"foo": 2},
116 test_req2,
117 {"foo": 3},
118 test_req3,
119 ]
120 for processed in [
121 list(mw.process_spider_output(Response("data:,"), spider_output)),
122 await collect_asyncgen(mw.process_start(as_async_generator(spider_output))),
123 ]:
124 assert len(processed) == 4
125 assert isinstance(processed[0], Request)
126 assert processed[0].url == "data:1,"
127 assert processed[1] == {"foo": 1}
128 assert processed[2] == {"foo": 30}
129 assert isinstance(processed[3], Request)
130 assert processed[3].url == "data:30,"

Callers

nothing calls this directly

Calls 7

RequestClass · 0.90
ResponseClass · 0.90
collect_asyncgenFunction · 0.90
as_async_generatorFunction · 0.90
from_crawlerMethod · 0.45
process_spider_outputMethod · 0.45
process_startMethod · 0.45

Tested by

no test coverage detected