MCPcopy
hub / github.com/scrapy/scrapy / ExecutionEngine

Class ExecutionEngine

scrapy/core/engine.py:102–686  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

100
101
102class ExecutionEngine:
103 _SLOT_HEARTBEAT_INTERVAL: float = 5.0
104
105 def __init__(
106 self,
107 crawler: Crawler,
108 spider_closed_callback: Callable[
109 [Spider], Coroutine[Any, Any, None] | Deferred[None] | None
110 ],
111 ) -> None:
112 self.crawler: Crawler = crawler
113 self.settings: Settings = crawler.settings
114 self.signals: SignalManager = crawler.signals
115 assert crawler.logformatter
116 self.logformatter: LogFormatter = crawler.logformatter
117 self._slot: _Slot | None = None
118 self.spider: Spider | None = None
119 self.running: bool = False
120 self._starting: bool = False
121 self._stopping: bool = False
122 self.paused: bool = False
123 self._spider_closed_callback: Callable[
124 [Spider], Coroutine[Any, Any, None] | Deferred[None] | None
125 ] = spider_closed_callback
126 self.start_time: float | None = None
127 self._start: AsyncIterator[Any] | None = None
128 self._closewait: Deferred[None] | None = None
129 self._start_request_processing_awaitable: (
130 asyncio.Future[None] | Deferred[None] | None
131 ) = None
132 downloader_cls: type[Downloader] = load_object(self.settings["DOWNLOADER"])
133 try:
134 self.scheduler_cls: type[BaseScheduler] = self._get_scheduler_class(
135 crawler.settings
136 )
137 self.downloader: Downloader = downloader_cls(crawler)
138 self._downloader_fetch_needs_spider: bool = argument_is_required(
139 self.downloader.fetch, "spider"
140 )
141 if self._downloader_fetch_needs_spider:
142 warnings.warn(
143 f"The fetch() method of {global_object_name(downloader_cls)} requires a spider argument,"
144 f" this is deprecated and the argument will not be passed in future Scrapy versions.",
145 ScrapyDeprecationWarning,
146 stacklevel=2,
147 )
148
149 self.scraper: Scraper = Scraper(crawler)
150 except Exception:
151 if hasattr(self, "downloader"):
152 self.downloader.close()
153 raise
154
155 def _get_scheduler_class(self, settings: BaseSettings) -> type[BaseScheduler]:
156 scheduler_cls: type[BaseScheduler] = load_object(settings["SCHEDULER"])
157 if not issubclass(scheduler_cls, BaseScheduler):
158 raise TypeError(
159 f"The provided scheduler class ({settings['SCHEDULER']})"

Calls

no outgoing calls