Method _requests_to_follow

scrapy/spiders/crawl.py:139–152 · view source on GitHub ↗

(self, response: Response)

Source from the content-addressed store, hash-verified

137	)
138
139	def _requests_to_follow(self, response: Response) -> Iterable[Request \| None]:
140	if not isinstance(response, HtmlResponse):
141	return
142	seen: set[Link] = set()
143	for rule_index, rule in enumerate(self._rules):
144	links: list[Link] = [
145	lnk
146	for lnk in rule.link_extractor.extract_links(response)
147	if lnk not in seen
148	]
149	for link in cast("ProcessLinksT", rule.process_links)(links):
150	seen.add(link)
151	request = self._build_request(rule_index, link)
152	yield cast("ProcessRequestT", rule.process_request)(request, response)
153
154	def _callback(self, response: Response, **cb_kwargs: Any) -> Any:
155	rule = self._rules[cast("int", response.meta["rule"])]

parse_with_rulesMethod · 0.95

test_rule_without_link_extractorMethod · 0.80

test_process_linksMethod · 0.80

test_process_links_filterMethod · 0.80

test_process_links_generatorMethod · 0.80

test_process_requestMethod · 0.80

test_process_request_with_responseMethod · 0.80

test_process_request_instance_methodMethod · 0.80

test_process_request_instance_method_with_responseMethod · 0.80

_build_requestMethod · 0.95

extract_linksMethod · 0.45

test_rule_without_link_extractorMethod · 0.64

test_process_linksMethod · 0.64

test_process_links_filterMethod · 0.64

test_process_links_generatorMethod · 0.64

test_process_requestMethod · 0.64

test_process_request_with_responseMethod · 0.64

test_process_request_instance_methodMethod · 0.64

test_process_request_instance_method_with_responseMethod · 0.64