Yield the initial :class:`~scrapy.Request` objects to send. .. versionadded:: 2.13 For example: .. code-block:: python from scrapy import Request, Spider class MySpider(Spider): name = "myspider" async def start(s
(self)
| 80 | crawler.signals.connect(self.close, signals.spider_closed) |
| 81 | |
| 82 | async def start(self) -> AsyncIterator[Any]: |
| 83 | """Yield the initial :class:`~scrapy.Request` objects to send. |
| 84 | |
| 85 | .. versionadded:: 2.13 |
| 86 | |
| 87 | For example: |
| 88 | |
| 89 | .. code-block:: python |
| 90 | |
| 91 | from scrapy import Request, Spider |
| 92 | |
| 93 | |
| 94 | class MySpider(Spider): |
| 95 | name = "myspider" |
| 96 | |
| 97 | async def start(self): |
| 98 | yield Request("https://toscrape.com/") |
| 99 | |
| 100 | The default implementation reads URLs from :attr:`start_urls` and |
| 101 | yields a request for each with :attr:`~scrapy.Request.dont_filter` |
| 102 | enabled. It is functionally equivalent to: |
| 103 | |
| 104 | .. code-block:: python |
| 105 | |
| 106 | async def start(self): |
| 107 | for url in self.start_urls: |
| 108 | yield Request(url, dont_filter=True) |
| 109 | |
| 110 | You can also yield :ref:`items <topics-items>`. For example: |
| 111 | |
| 112 | .. code-block:: python |
| 113 | |
| 114 | async def start(self): |
| 115 | yield {"foo": "bar"} |
| 116 | |
| 117 | To write spiders that work on Scrapy versions lower than 2.13, |
| 118 | define also a synchronous ``start_requests()`` method that returns an |
| 119 | iterable. For example: |
| 120 | |
| 121 | .. code-block:: python |
| 122 | |
| 123 | def start_requests(self): |
| 124 | yield Request("https://toscrape.com/") |
| 125 | |
| 126 | .. seealso:: :ref:`start-requests` |
| 127 | """ |
| 128 | if not self.start_urls and hasattr(self, "start_url"): |
| 129 | raise AttributeError( |
| 130 | "Crawling could not start: 'start_urls' not found " |
| 131 | "or empty (but found 'start_url' attribute instead, " |
| 132 | "did you miss an 's'?)" |
| 133 | ) |
| 134 | for url in self.start_urls: |
| 135 | yield Request(url, dont_filter=True) |
| 136 | |
| 137 | def _parse(self, response: Response, **kwargs: Any) -> Any: |
| 138 | return self.parse(response, **kwargs) |