The detect_encoding() function is used to detect the encoding that should be used to decode a Python source file. It requires one argument, readline, in the same way as the tokenize() generator. It will call readline a maximum of twice, and return the encoding used (as a strin
(readline)
| 356 | return orig_enc |
| 357 | |
| 358 | def detect_encoding(readline): |
| 359 | """ |
| 360 | The detect_encoding() function is used to detect the encoding that should |
| 361 | be used to decode a Python source file. It requires one argument, readline, |
| 362 | in the same way as the tokenize() generator. |
| 363 | |
| 364 | It will call readline a maximum of twice, and return the encoding used |
| 365 | (as a string) and a list of any lines (left as bytes) it has read in. |
| 366 | |
| 367 | It detects the encoding from the presence of a utf-8 bom or an encoding |
| 368 | cookie as specified in pep-0263. If both a bom and a cookie are present, |
| 369 | but disagree, a SyntaxError will be raised. If the encoding cookie is an |
| 370 | invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, |
| 371 | 'utf-8-sig' is returned. |
| 372 | |
| 373 | If no encoding is specified, then the default of 'utf-8' will be returned. |
| 374 | """ |
| 375 | try: |
| 376 | filename = readline.__self__.name |
| 377 | except AttributeError: |
| 378 | filename = None |
| 379 | bom_found = False |
| 380 | encoding = None |
| 381 | default = 'utf-8' |
| 382 | def read_or_stop(): |
| 383 | try: |
| 384 | return readline() |
| 385 | except StopIteration: |
| 386 | return b'' |
| 387 | |
| 388 | def check(line, encoding): |
| 389 | # Check if the line matches the encoding. |
| 390 | if 0 in line: |
| 391 | raise SyntaxError("source code cannot contain null bytes") |
| 392 | try: |
| 393 | line.decode(encoding) |
| 394 | except UnicodeDecodeError: |
| 395 | msg = "invalid or missing encoding declaration" |
| 396 | if filename is not None: |
| 397 | msg = '{} for {!r}'.format(msg, filename) |
| 398 | raise SyntaxError(msg) |
| 399 | |
| 400 | def find_cookie(line): |
| 401 | match = cookie_re.match(line) |
| 402 | if not match: |
| 403 | return None |
| 404 | encoding = _get_normal_name(match.group(1).decode()) |
| 405 | try: |
| 406 | lookup(encoding) |
| 407 | except LookupError: |
| 408 | # This behaviour mimics the Python interpreter |
| 409 | if filename is None: |
| 410 | msg = "unknown encoding: " + encoding |
| 411 | else: |
| 412 | msg = "unknown encoding for {!r}: {}".format(filename, |
| 413 | encoding) |
| 414 | raise SyntaxError(msg) |
| 415 |
no test coverage detected
searching dependent graphs…