Run a crawler with the provided arguments. It will call the given Crawler's :meth:`~Crawler.crawl` method, while keeping track of it so it can be stopped later. If ``crawler_or_spidercls`` isn't a :class:`~scrapy.crawler.Crawler` instance, this method will
(
self,
crawler_or_spidercls: type[Spider] | str | Crawler,
*args: Any,
**kwargs: Any,
)
| 519 | self._active: set[asyncio.Task[None]] = set() |
| 520 | |
| 521 | def crawl( |
| 522 | self, |
| 523 | crawler_or_spidercls: type[Spider] | str | Crawler, |
| 524 | *args: Any, |
| 525 | **kwargs: Any, |
| 526 | ) -> asyncio.Task[None]: |
| 527 | """ |
| 528 | Run a crawler with the provided arguments. |
| 529 | |
| 530 | It will call the given Crawler's :meth:`~Crawler.crawl` method, while |
| 531 | keeping track of it so it can be stopped later. |
| 532 | |
| 533 | If ``crawler_or_spidercls`` isn't a :class:`~scrapy.crawler.Crawler` |
| 534 | instance, this method will try to create one using this parameter as |
| 535 | the spider class given to it. |
| 536 | |
| 537 | Returns a :class:`~asyncio.Task` object which completes when the |
| 538 | crawling is finished. |
| 539 | |
| 540 | :param crawler_or_spidercls: already created crawler, or a spider class |
| 541 | or spider's name inside the project to create it |
| 542 | :type crawler_or_spidercls: :class:`~scrapy.crawler.Crawler` instance, |
| 543 | :class:`~scrapy.spiders.Spider` subclass or string |
| 544 | |
| 545 | :param args: arguments to initialize the spider |
| 546 | |
| 547 | :param kwargs: keyword arguments to initialize the spider |
| 548 | """ |
| 549 | if isinstance(crawler_or_spidercls, Spider): |
| 550 | raise ValueError( |
| 551 | "The crawler_or_spidercls argument cannot be a spider object, " |
| 552 | "it must be a spider class (or a Crawler object)" |
| 553 | ) |
| 554 | if self.settings.getbool("TWISTED_REACTOR_ENABLED"): |
| 555 | if not is_reactor_installed(): |
| 556 | raise RuntimeError( |
| 557 | "We expected a Twisted reactor to be installed but it isn't." |
| 558 | ) |
| 559 | if not is_asyncio_reactor_installed(): |
| 560 | raise RuntimeError( |
| 561 | f"When TWISTED_REACTOR_ENABLED is True, {type(self).__name__} " |
| 562 | f"requires that the installed Twisted reactor is " |
| 563 | f'"twisted.internet.asyncioreactor.AsyncioSelectorReactor".' |
| 564 | ) |
| 565 | elif is_reactor_installed(): |
| 566 | raise RuntimeError( |
| 567 | "TWISTED_REACTOR_ENABLED is False but a Twisted reactor is installed." |
| 568 | ) |
| 569 | crawler = self.create_crawler(crawler_or_spidercls) |
| 570 | return self._crawl(crawler, *args, **kwargs) |
| 571 | |
| 572 | async def _crawl_and_track( |
| 573 | self, crawler: Crawler, *args: Any, **kwargs: Any |
no test coverage detected