(self)
| 500 | ] |
| 501 | |
| 502 | def test_xhtml(self): |
| 503 | xhtml = b""" |
| 504 | <?xml version="1.0"?> |
| 505 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 506 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 507 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 508 | <head> |
| 509 | <title>XHTML document title</title> |
| 510 | </head> |
| 511 | <body> |
| 512 | <div class='links'> |
| 513 | <p><a href="/about.html">About us</a></p> |
| 514 | </div> |
| 515 | <div> |
| 516 | <p><a href="/follow.html">Follow this link</a></p> |
| 517 | </div> |
| 518 | <div> |
| 519 | <p><a href="/nofollow.html" rel="nofollow">Dont follow this one</a></p> |
| 520 | </div> |
| 521 | <div> |
| 522 | <p><a href="/nofollow2.html" rel="blah">Choose to follow or not</a></p> |
| 523 | </div> |
| 524 | <div> |
| 525 | <p><a href="http://google.com/something" rel="external nofollow">External link not to follow</a></p> |
| 526 | </div> |
| 527 | </body> |
| 528 | </html> |
| 529 | """ |
| 530 | |
| 531 | response = HtmlResponse("http://example.com/index.xhtml", body=xhtml) |
| 532 | |
| 533 | lx = self.extractor_cls() |
| 534 | assert lx.extract_links(response) == [ |
| 535 | Link( |
| 536 | url="http://example.com/about.html", |
| 537 | text="About us", |
| 538 | fragment="", |
| 539 | nofollow=False, |
| 540 | ), |
| 541 | Link( |
| 542 | url="http://example.com/follow.html", |
| 543 | text="Follow this link", |
| 544 | fragment="", |
| 545 | nofollow=False, |
| 546 | ), |
| 547 | Link( |
| 548 | url="http://example.com/nofollow.html", |
| 549 | text="Dont follow this one", |
| 550 | fragment="", |
| 551 | nofollow=True, |
| 552 | ), |
| 553 | Link( |
| 554 | url="http://example.com/nofollow2.html", |
| 555 | text="Choose to follow or not", |
| 556 | fragment="", |
| 557 | nofollow=False, |
| 558 | ), |
| 559 | Link( |
nothing calls this directly
no test coverage detected