MCPcopy
hub / github.com/scrapy/scrapy / follow_all

Method follow_all

scrapy/http/response/text.py:221–292  ·  view source on GitHub ↗

A generator that produces :class:`~.Request` instances to follow all links in ``urls``. It accepts the same arguments as the :class:`~.Request`'s ``__init__()`` method, except that each ``urls`` element does not need to be an absolute URL, it can be any of the follow

(
        self,
        urls: Iterable[str | Link] | parsel.SelectorList[Any] | None = None,
        callback: CallbackT | None = None,
        method: str = "GET",
        headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None,
        body: bytes | str | None = None,
        cookies: CookiesT | None = None,
        meta: dict[str, Any] | None = None,
        encoding: str | None = None,
        priority: int = 0,
        dont_filter: bool = False,
        errback: Callable[[Failure], Any] | None = None,
        cb_kwargs: dict[str, Any] | None = None,
        flags: list[str] | None = None,
        css: str | None = None,
        xpath: str | None = None,
    )

Source from the content-addressed store, hash-verified

219 )
220
221 def follow_all(
222 self,
223 urls: Iterable[str | Link] | parsel.SelectorList[Any] | None = None,
224 callback: CallbackT | None = None,
225 method: str = "GET",
226 headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None,
227 body: bytes | str | None = None,
228 cookies: CookiesT | None = None,
229 meta: dict[str, Any] | None = None,
230 encoding: str | None = None,
231 priority: int = 0,
232 dont_filter: bool = False,
233 errback: Callable[[Failure], Any] | None = None,
234 cb_kwargs: dict[str, Any] | None = None,
235 flags: list[str] | None = None,
236 css: str | None = None,
237 xpath: str | None = None,
238 ) -> Iterable[Request]:
239 """
240 A generator that produces :class:`~.Request` instances to follow all
241 links in ``urls``. It accepts the same arguments as the :class:`~.Request`'s
242 ``__init__()`` method, except that each ``urls`` element does not need to be
243 an absolute URL, it can be any of the following:
244
245 * a relative URL
246 * a :class:`~scrapy.link.Link` object, e.g. the result of
247 :ref:`topics-link-extractors`
248 * a :class:`~scrapy.Selector` object for a ``<link>`` or ``<a>`` element, e.g.
249 ``response.css('a.my_link')[0]``
250 * an attribute :class:`~scrapy.Selector` (not SelectorList), e.g.
251 ``response.css('a::attr(href)')[0]`` or
252 ``response.xpath('//img/@src')[0]``
253
254 In addition, ``css`` and ``xpath`` arguments are accepted to perform the link extraction
255 within the ``follow_all()`` method (only one of ``urls``, ``css`` and ``xpath`` is accepted).
256
257 Note that when passing a ``SelectorList`` as argument for the ``urls`` parameter or
258 using the ``css`` or ``xpath`` parameters, this method will not produce requests for
259 selectors from which links cannot be obtained (for instance, anchor tags without an
260 ``href`` attribute)
261 """
262 arguments = [x for x in (urls, css, xpath) if x is not None]
263 if len(arguments) != 1:
264 raise ValueError(
265 "Please supply exactly one of the following arguments: urls, css, xpath"
266 )
267 if not urls:
268 if css:
269 urls = self.css(css)
270 if xpath:
271 urls = self.xpath(xpath)
272 if isinstance(urls, parsel.SelectorList):
273 selectors = urls
274 urls = []
275 for sel in selectors:
276 with suppress(_InvalidSelector):
277 urls.append(_url_from_selector(sel))
278 return super().follow_all(

Calls 3

cssMethod · 0.95
xpathMethod · 0.95
_url_from_selectorFunction · 0.85