MCPcopy
hub / github.com/scrapy/scrapy / decode_robotstxt

Function decode_robotstxt

scrapy/robotstxt.py:24–42  ·  view source on GitHub ↗
(
    robotstxt_body: bytes, spider: Spider | None, to_native_str_type: bool = False
)

Source from the content-addressed store, hash-verified

22
23
24def decode_robotstxt(
25 robotstxt_body: bytes, spider: Spider | None, to_native_str_type: bool = False
26) -> str:
27 try:
28 if to_native_str_type:
29 body_decoded = to_unicode(robotstxt_body)
30 else:
31 body_decoded = robotstxt_body.decode("utf-8-sig", errors="ignore")
32 except UnicodeDecodeError:
33 # If we found garbage or robots.txt in an encoding other than UTF-8, disregard it.
34 # Switch to 'allow all' state.
35 logger.warning(
36 "Failure while parsing robots.txt. File either contains garbage or "
37 "is in an encoding other than UTF-8, treating it as an empty file.",
38 exc_info=sys.exc_info(),
39 extra={"spider": spider},
40 )
41 body_decoded = ""
42 return body_decoded
43
44
45class RobotParser(metaclass=ABCMeta):

Callers 7

test_decode_utf8Method · 0.90
test_decode_non_utf8Method · 0.90
test_decode_utf8_bomMethod · 0.90
__init__Method · 0.85
__init__Method · 0.85
__init__Method · 0.85

Calls 1

to_unicodeFunction · 0.90

Tested by 4

test_decode_utf8Method · 0.72
test_decode_non_utf8Method · 0.72
test_decode_utf8_bomMethod · 0.72