(
self, stop_after_crawl: bool, install_signal_handlers: bool
)
| 889 | self._start_twisted(stop_after_crawl, install_signal_handlers) |
| 890 | |
| 891 | def _start_asyncio( |
| 892 | self, stop_after_crawl: bool, install_signal_handlers: bool |
| 893 | ) -> None: |
| 894 | # We cannot use asyncio.run() here, because we can't let it handle the |
| 895 | # loop lifetime: _crawl() needs a loop (which we create in __init__()), |
| 896 | # because crawl() returns a Task. |
| 897 | # So we reproduce a part of asyncio.runners.Runner that is useful to us. |
| 898 | |
| 899 | # Normal workflow: |
| 900 | # 1. _start_asyncio() creates a task for self.join() and calls _run_loop() |
| 901 | # 2. _run_loop() calls loop.run_until_complete(main_task) |
| 902 | # 3. Crawling tasks start and finish |
| 903 | # 4. join() completes, loop.run_until_complete() and thus _run_loop() return |
| 904 | # 5. _start_asyncio() calls _close_loop() |
| 905 | # 6. _close_loop() does finalization and calls loop.close() |
| 906 | |
| 907 | # Normal workflow with stop_after_crawl=False: |
| 908 | # 1. _start_asyncio() creates a simple future and calls _run_loop() |
| 909 | # 2. _run_loop() calls loop.run_until_complete(main_task) |
| 910 | # 3. Crawling tasks start and finish |
| 911 | # 4. _run_loop() blocks until the loop is stopped externally or the |
| 912 | # future is cancelled via Ctrl-C |
| 913 | # 5. (after _run_loop() returns) _start_asyncio() calls _close_loop() |
| 914 | # 6. _close_loop() does finalization and calls loop.close() |
| 915 | |
| 916 | # Workflow with Ctrl-C pressed once: |
| 917 | # 1. While loop.run_until_complete() blocks, _signal_shutdown_reactorless() |
| 918 | # is called |
| 919 | # 2. _signal_shutdown_reactorless() calls _shutdown_graceful_reactorless() |
| 920 | # (via call_soon_threadsafe()) |
| 921 | # 3. _shutdown_graceful_reactorless() calls stop() |
| 922 | # 4. For stop_after_crawl=True: crawl tasks finish, join() completes, |
| 923 | # loop.run_until_complete() and thus _run_loop() return |
| 924 | # For stop_after_crawl=False: _shutdown_graceful_reactorless() waits |
| 925 | # for crawl tasks via join(), then cancels the main task, |
| 926 | # loop.run_until_complete() raises CancelledError, _run_loop() returns |
| 927 | # 5. _start_asyncio() calls _close_loop() |
| 928 | # 6. _close_loop() does finalization and calls loop.close() |
| 929 | |
| 930 | # Workflow with Ctrl-C pressed twice: |
| 931 | # 1. While loop.run_until_complete() blocks, _signal_shutdown_reactorless() |
| 932 | # is called |
| 933 | # 2. _signal_shutdown_reactorless() calls _shutdown_graceful_reactorless() |
| 934 | # (via call_soon_threadsafe()) and installs _signal_kill_reactorless() |
| 935 | # as the next handler |
| 936 | # 3. Before _shutdown_graceful_reactorless() completes, |
| 937 | # _signal_kill_reactorless() is called |
| 938 | # 4. _signal_kill_reactorless() cancels the main task |
| 939 | # (via call_soon_threadsafe()) |
| 940 | # 5. loop.run_until_complete() raises CancelledError, _run_loop() returns |
| 941 | # 6. _start_asyncio() calls _close_loop() |
| 942 | # 7. _close_loop() cancels all pending tasks (including |
| 943 | # _shutdown_graceful_reactorless()), does finalization and calls loop.close() |
| 944 | |
| 945 | loop = self._reactorless_loop |
| 946 | assert loop |
| 947 | |
| 948 | if stop_after_crawl: |
no test coverage detected