Start the crawler by instantiating its spider class with the given *args* and *kwargs* arguments, while setting the execution engine in motion. Should be called only once. .. versionadded:: 2.14 Complete when the crawl is finished.
(self, *args: Any, **kwargs: Any)
| 198 | raise |
| 199 | |
| 200 | async def crawl_async(self, *args: Any, **kwargs: Any) -> None: |
| 201 | """Start the crawler by instantiating its spider class with the given |
| 202 | *args* and *kwargs* arguments, while setting the execution engine in |
| 203 | motion. Should be called only once. |
| 204 | |
| 205 | .. versionadded:: 2.14 |
| 206 | |
| 207 | Complete when the crawl is finished. |
| 208 | """ |
| 209 | if self.crawling: |
| 210 | raise RuntimeError("Crawling already taking place") |
| 211 | if self._started: |
| 212 | raise RuntimeError( |
| 213 | "Cannot run Crawler.crawl_async() more than once on the same instance." |
| 214 | ) |
| 215 | self.crawling = self._started = True |
| 216 | |
| 217 | try: |
| 218 | self.spider = self._create_spider(*args, **kwargs) |
| 219 | self._apply_settings() |
| 220 | self._update_root_log_handler() |
| 221 | self.engine = self._create_engine() |
| 222 | await self.engine.open_spider_async() |
| 223 | await self.engine.start_async() |
| 224 | except Exception: |
| 225 | self.crawling = False |
| 226 | if self.engine is not None: |
| 227 | await self.engine.close_async() |
| 228 | raise |
| 229 | |
| 230 | def _create_spider(self, *args: Any, **kwargs: Any) -> Spider: |
| 231 | return self.spidercls.from_crawler(self, *args, **kwargs) |