(self)
| 49 | ] |
| 50 | |
| 51 | def test_process_links(self): |
| 52 | response = HtmlResponse( |
| 53 | "http://example.org/somepage/index.html", body=self.test_body |
| 54 | ) |
| 55 | |
| 56 | class _CrawlSpider(self.spider_class): |
| 57 | name = "test" |
| 58 | allowed_domains = ["example.org"] |
| 59 | rules = (Rule(LinkExtractor(), process_links="dummy_process_links"),) |
| 60 | |
| 61 | def dummy_process_links(self, links): |
| 62 | return links |
| 63 | |
| 64 | spider = _CrawlSpider() |
| 65 | output = list(spider._requests_to_follow(response)) |
| 66 | assert len(output) == 3 |
| 67 | assert all(isinstance(r, Request) for r in output) |
| 68 | assert [r.url for r in output] == [ |
| 69 | "http://example.org/somepage/item/12.html", |
| 70 | "http://example.org/about.html", |
| 71 | "http://example.org/nofollow.html", |
| 72 | ] |
| 73 | |
| 74 | def test_process_links_filter(self): |
| 75 | response = HtmlResponse( |
nothing calls this directly
no test coverage detected