MCPcopy
hub / github.com/scrapy/scrapy / _start_asyncio

Method _start_asyncio

scrapy/crawler.py:891–959  ·  view source on GitHub ↗
(
        self, stop_after_crawl: bool, install_signal_handlers: bool
    )

Source from the content-addressed store, hash-verified

889 self._start_twisted(stop_after_crawl, install_signal_handlers)
890
891 def _start_asyncio(
892 self, stop_after_crawl: bool, install_signal_handlers: bool
893 ) -> None:
894 # We cannot use asyncio.run() here, because we can't let it handle the
895 # loop lifetime: _crawl() needs a loop (which we create in __init__()),
896 # because crawl() returns a Task.
897 # So we reproduce a part of asyncio.runners.Runner that is useful to us.
898
899 # Normal workflow:
900 # 1. _start_asyncio() creates a task for self.join() and calls _run_loop()
901 # 2. _run_loop() calls loop.run_until_complete(main_task)
902 # 3. Crawling tasks start and finish
903 # 4. join() completes, loop.run_until_complete() and thus _run_loop() return
904 # 5. _start_asyncio() calls _close_loop()
905 # 6. _close_loop() does finalization and calls loop.close()
906
907 # Normal workflow with stop_after_crawl=False:
908 # 1. _start_asyncio() creates a simple future and calls _run_loop()
909 # 2. _run_loop() calls loop.run_until_complete(main_task)
910 # 3. Crawling tasks start and finish
911 # 4. _run_loop() blocks until the loop is stopped externally or the
912 # future is cancelled via Ctrl-C
913 # 5. (after _run_loop() returns) _start_asyncio() calls _close_loop()
914 # 6. _close_loop() does finalization and calls loop.close()
915
916 # Workflow with Ctrl-C pressed once:
917 # 1. While loop.run_until_complete() blocks, _signal_shutdown_reactorless()
918 # is called
919 # 2. _signal_shutdown_reactorless() calls _shutdown_graceful_reactorless()
920 # (via call_soon_threadsafe())
921 # 3. _shutdown_graceful_reactorless() calls stop()
922 # 4. For stop_after_crawl=True: crawl tasks finish, join() completes,
923 # loop.run_until_complete() and thus _run_loop() return
924 # For stop_after_crawl=False: _shutdown_graceful_reactorless() waits
925 # for crawl tasks via join(), then cancels the main task,
926 # loop.run_until_complete() raises CancelledError, _run_loop() returns
927 # 5. _start_asyncio() calls _close_loop()
928 # 6. _close_loop() does finalization and calls loop.close()
929
930 # Workflow with Ctrl-C pressed twice:
931 # 1. While loop.run_until_complete() blocks, _signal_shutdown_reactorless()
932 # is called
933 # 2. _signal_shutdown_reactorless() calls _shutdown_graceful_reactorless()
934 # (via call_soon_threadsafe()) and installs _signal_kill_reactorless()
935 # as the next handler
936 # 3. Before _shutdown_graceful_reactorless() completes,
937 # _signal_kill_reactorless() is called
938 # 4. _signal_kill_reactorless() cancels the main task
939 # (via call_soon_threadsafe())
940 # 5. loop.run_until_complete() raises CancelledError, _run_loop() returns
941 # 6. _start_asyncio() calls _close_loop()
942 # 7. _close_loop() cancels all pending tasks (including
943 # _shutdown_graceful_reactorless()), does finalization and calls loop.close()
944
945 loop = self._reactorless_loop
946 assert loop
947
948 if stop_after_crawl:

Callers 1

startMethod · 0.95

Calls 3

_run_loopMethod · 0.95
_close_loopMethod · 0.95
joinMethod · 0.45

Tested by

no test coverage detected