MCPcopy
hub / github.com/scrapy/scrapy / crawl

Method crawl

scrapy/crawler.py:172–198  ·  view source on GitHub ↗

Start the crawler by instantiating its spider class with the given *args* and *kwargs* arguments, while setting the execution engine in motion. Should be called only once. Return a deferred that is fired when the crawl is finished.

(self, *args: Any, **kwargs: Any)

Source from the content-addressed store, hash-verified

170 # this method.
171 @inlineCallbacks
172 def crawl(self, *args: Any, **kwargs: Any) -> Generator[Deferred[Any], Any, None]:
173 """Start the crawler by instantiating its spider class with the given
174 *args* and *kwargs* arguments, while setting the execution engine in
175 motion. Should be called only once.
176
177 Return a deferred that is fired when the crawl is finished.
178 """
179 if self.crawling:
180 raise RuntimeError("Crawling already taking place")
181 if self._started:
182 raise RuntimeError(
183 "Cannot run Crawler.crawl() more than once on the same instance."
184 )
185 self.crawling = self._started = True
186
187 try:
188 self.spider = self._create_spider(*args, **kwargs)
189 self._apply_settings()
190 self._update_root_log_handler()
191 self.engine = self._create_engine()
192 yield deferred_from_coro(self.engine.open_spider_async())
193 yield deferred_from_coro(self.engine.start_async())
194 except Exception:
195 self.crawling = False
196 if self.engine is not None:
197 yield deferred_from_coro(self.engine.close_async())
198 raise
199
200 async def crawl_async(self, *args: Any, **kwargs: Any) -> None:
201 """Start the crawler by instantiating its spider class with the given

Callers 3

_crawlMethod · 0.45
_scheduleMethod · 0.45

Calls 8

_create_spiderMethod · 0.95
_apply_settingsMethod · 0.95
_create_engineMethod · 0.95
deferred_from_coroFunction · 0.90
start_asyncMethod · 0.80
close_asyncMethod · 0.80
open_spider_asyncMethod · 0.45

Tested by 1