Download the page at `url` and parse it for links. Returned links have had the fragment after `#` removed, and have been made absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes 'http://www.tornadoweb.org/en/stable/gen.html'.
(url)
| 14 | |
| 15 | |
| 16 | async def get_links_from_url(url): |
| 17 | """Download the page at `url` and parse it for links. |
| 18 | |
| 19 | Returned links have had the fragment after `#` removed, and have been made |
| 20 | absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes |
| 21 | 'http://www.tornadoweb.org/en/stable/gen.html'. |
| 22 | """ |
| 23 | response = await httpclient.AsyncHTTPClient().fetch(url) |
| 24 | print("fetched %s" % url) |
| 25 | |
| 26 | html = response.body.decode(errors="ignore") |
| 27 | return [urljoin(url, remove_fragment(new_url)) for new_url in get_links(html)] |
| 28 | |
| 29 | |
| 30 | def remove_fragment(url): |
no test coverage detected