Test the extractor's behaviour for links with rel='nofollow
(self)
| 150 | ] |
| 151 | |
| 152 | def test_nofollow(self): |
| 153 | """Test the extractor's behaviour for links with rel='nofollow'""" |
| 154 | |
| 155 | html = b"""<html><head><title>Page title</title></head> |
| 156 | <body> |
| 157 | <div class='links'> |
| 158 | <p><a href="/about.html">About us</a></p> |
| 159 | </div> |
| 160 | <div> |
| 161 | <p><a href="/follow.html">Follow this link</a></p> |
| 162 | </div> |
| 163 | <div> |
| 164 | <p><a href="/nofollow.html" rel="nofollow">Dont follow this one</a></p> |
| 165 | </div> |
| 166 | <div> |
| 167 | <p><a href="/nofollow2.html" rel="blah">Choose to follow or not</a></p> |
| 168 | </div> |
| 169 | <div> |
| 170 | <p><a href="http://google.com/something" rel="external nofollow">External link not to follow</a></p> |
| 171 | </div> |
| 172 | </body></html>""" |
| 173 | response = HtmlResponse("http://example.org/somepage/index.html", body=html) |
| 174 | |
| 175 | lx = self.extractor_cls() |
| 176 | assert lx.extract_links(response) == [ |
| 177 | Link(url="http://example.org/about.html", text="About us"), |
| 178 | Link(url="http://example.org/follow.html", text="Follow this link"), |
| 179 | Link( |
| 180 | url="http://example.org/nofollow.html", |
| 181 | text="Dont follow this one", |
| 182 | nofollow=True, |
| 183 | ), |
| 184 | Link( |
| 185 | url="http://example.org/nofollow2.html", |
| 186 | text="Choose to follow or not", |
| 187 | ), |
| 188 | Link( |
| 189 | url="http://google.com/something", |
| 190 | text="External link not to follow", |
| 191 | nofollow=True, |
| 192 | ), |
| 193 | ] |
| 194 | |
| 195 | def test_matches(self): |
| 196 | url1 = "http://lotsofstuff.com/stuff1/index" |
nothing calls this directly
no test coverage detected