MCPcopy
hub / github.com/scrapy/scrapy / scraped_data

Method scraped_data

scrapy/commands/parse.py:276–303  ·  view source on GitHub ↗
(
        self,
        args: tuple[
            list[Any], list[Request], argparse.Namespace, int, Spider, CallbackT
        ],
    )

Source from the content-addressed store, hash-verified

274 logger.error("No response downloaded for: %(url)s", {"url": url})
275
276 def scraped_data(
277 self,
278 args: tuple[
279 list[Any], list[Request], argparse.Namespace, int, Spider, CallbackT
280 ],
281 ) -> list[Any]:
282 items, requests, opts, depth, spider, callback = args
283 if opts.pipelines:
284 assert self.pcrawler.engine
285 itemproc = self.pcrawler.engine.scraper.itemproc
286 if hasattr(itemproc, "process_item_async"):
287 for item in items:
288 _schedule_coro(itemproc.process_item_async(item))
289 else:
290 for item in items:
291 itemproc.process_item(item, spider)
292 self.add_items(depth, items)
293 self.add_requests(depth, requests)
294
295 scraped_data = items if opts.output else []
296 if depth < opts.depth:
297 for req in requests:
298 req.meta["_depth"] = depth + 1
299 req.meta["_callback"] = req.callback
300 req.callback = callback
301 scraped_data += requests
302
303 return scraped_data
304
305 def _get_callback(
306 self,

Callers

nothing calls this directly

Calls 5

add_itemsMethod · 0.95
add_requestsMethod · 0.95
_schedule_coroFunction · 0.90
process_item_asyncMethod · 0.45
process_itemMethod · 0.45

Tested by

no test coverage detected