| 47 | |
| 48 | |
| 49 | class DownloadHandlers: |
| 50 | def __init__(self, crawler: Crawler): |
| 51 | self._crawler: Crawler = crawler |
| 52 | # stores acceptable schemes on instancing |
| 53 | self._schemes: dict[str, str | Callable[..., Any]] = {} |
| 54 | # stores instanced handlers for schemes |
| 55 | self._handlers: dict[str, DownloadHandlerProtocol] = {} |
| 56 | # remembers failed handlers |
| 57 | self._notconfigured: dict[str, str] = {} |
| 58 | # remembers handlers with Deferred-based download_request() |
| 59 | self._old_style_handlers: set[str] = set() |
| 60 | handlers: dict[str, str | Callable[..., Any]] = without_none_values( |
| 61 | cast( |
| 62 | "dict[str, str | Callable[..., Any]]", |
| 63 | crawler.settings.getwithbase("DOWNLOAD_HANDLERS"), |
| 64 | ) |
| 65 | ) |
| 66 | for scheme, clspath in handlers.items(): |
| 67 | self._schemes[scheme] = clspath |
| 68 | self._load_handler(scheme, skip_lazy=True) |
| 69 | |
| 70 | crawler.signals.connect(self._close, signals.engine_stopped) |
| 71 | |
| 72 | def _get_handler(self, scheme: str) -> DownloadHandlerProtocol | None: |
| 73 | """Lazy-load the downloadhandler for a scheme |
| 74 | only on the first request for that scheme. |
| 75 | """ |
| 76 | if scheme in self._handlers: |
| 77 | return self._handlers[scheme] |
| 78 | if scheme in self._notconfigured: |
| 79 | return None |
| 80 | if scheme not in self._schemes: |
| 81 | self._notconfigured[scheme] = "no handler available for that scheme" |
| 82 | return None |
| 83 | |
| 84 | return self._load_handler(scheme) |
| 85 | |
| 86 | def _load_handler( |
| 87 | self, scheme: str, skip_lazy: bool = False |
| 88 | ) -> DownloadHandlerProtocol | None: |
| 89 | path = self._schemes[scheme] |
| 90 | try: |
| 91 | dhcls: type[DownloadHandlerProtocol] = load_object(path) |
| 92 | if skip_lazy: |
| 93 | if not hasattr(dhcls, "lazy"): |
| 94 | warnings.warn( |
| 95 | f"{global_object_name(dhcls)} doesn't define a 'lazy' attribute." |
| 96 | f" This is deprecated, please add 'lazy = True' (which is the current" |
| 97 | f" default value) to the class definition.", |
| 98 | category=ScrapyDeprecationWarning, |
| 99 | stacklevel=1, |
| 100 | ) |
| 101 | if getattr(dhcls, "lazy", True): |
| 102 | return None |
| 103 | dh = build_from_crawler( |
| 104 | dhcls, |
| 105 | self._crawler, |
| 106 | ) |
no outgoing calls