(self)
| 29 | spider_class = CrawlSpider |
| 30 | |
| 31 | def test_rule_without_link_extractor(self): |
| 32 | response = HtmlResponse( |
| 33 | "http://example.org/somepage/index.html", body=self.test_body |
| 34 | ) |
| 35 | |
| 36 | class _CrawlSpider(self.spider_class): |
| 37 | name = "test" |
| 38 | allowed_domains = ["example.org"] |
| 39 | rules = (Rule(),) |
| 40 | |
| 41 | spider = _CrawlSpider() |
| 42 | output = list(spider._requests_to_follow(response)) |
| 43 | assert len(output) == 3 |
| 44 | assert all(isinstance(r, Request) for r in output) |
| 45 | assert [r.url for r in output] == [ |
| 46 | "http://example.org/somepage/item/12.html", |
| 47 | "http://example.org/about.html", |
| 48 | "http://example.org/nofollow.html", |
| 49 | ] |
| 50 | |
| 51 | def test_process_links(self): |
| 52 | response = HtmlResponse( |
nothing calls this directly
no test coverage detected