Method _link_allowed

scrapy/linkextractors/lxmlhtml.py:217–235 · view source on GitHub ↗

(self, link: Link)

Source from the content-addressed store, hash-verified

215	]
216
217	def _link_allowed(self, link: Link) -> bool:
218	if not _is_valid_url(link.url):
219	return False
220	if self.allow_res and not _matches(link.url, self.allow_res):
221	return False
222	if self.deny_res and _matches(link.url, self.deny_res):
223	return False
224	parsed_url = urlparse(link.url)
225	if self.allow_domains and not url_is_from_any_domain(
226	parsed_url, self.allow_domains
227	):
228	return False
229	if self.deny_domains and url_is_from_any_domain(parsed_url, self.deny_domains):
230	return False
231	if self.deny_extensions and url_has_any_extension(
232	parsed_url, self.deny_extensions
233	):
234	return False
235	return not self.restrict_text or _matches(link.text, self.restrict_text)
236
237	def matches(self, url: str) -> bool:
238	if self.allow_domains and not url_is_from_any_domain(url, self.allow_domains):

_process_linksMethod · 0.95

test_link_allowed_is_false_with_empty_urlMethod · 0.80

test_link_allowed_is_false_with_bad_url_prefixMethod · 0.80

test_link_allowed_is_false_with_missing_url_prefixMethod · 0.80

_is_valid_urlFunction · 0.90

_matchesFunction · 0.90

url_is_from_any_domainFunction · 0.90

url_has_any_extensionFunction · 0.90

test_link_allowed_is_false_with_empty_urlMethod · 0.64

test_link_allowed_is_false_with_bad_url_prefixMethod · 0.64

test_link_allowed_is_false_with_missing_url_prefixMethod · 0.64