Check if a URL can be fetched according to the domain's robots.txt. :param url: The full URL to check :param sid: Session ID for fetching robots.txt if not yet cached
(self, url: str, sid: str)
| 41 | return parser |
| 42 | |
| 43 | async def can_fetch(self, url: str, sid: str) -> bool: |
| 44 | """Check if a URL can be fetched according to the domain's robots.txt. |
| 45 | |
| 46 | :param url: The full URL to check |
| 47 | :param sid: Session ID for fetching robots.txt if not yet cached |
| 48 | """ |
| 49 | parser = await self._get_parser(url, sid) |
| 50 | return parser.can_fetch(url, "*") |
| 51 | |
| 52 | async def get_delay_directives(self, url: str, sid: str) -> tuple[Optional[float], Optional[tuple[int, int]]]: |
| 53 | """Return both crawl-delay and request-rate in a single parser lookup. |