:rtype: str
(data: bytes)
| 1006 | |
| 1007 | |
| 1008 | def guess_json_utf(data: bytes) -> str | None: |
| 1009 | """ |
| 1010 | :rtype: str |
| 1011 | """ |
| 1012 | # JSON always starts with two ASCII characters, so detection is as |
| 1013 | # easy as counting the nulls and from their location and count |
| 1014 | # determine the encoding. Also detect a BOM, if present. |
| 1015 | sample = data[:4] |
| 1016 | if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): |
| 1017 | return "utf-32" # BOM included |
| 1018 | if sample[:3] == codecs.BOM_UTF8: |
| 1019 | return "utf-8-sig" # BOM included, MS style (discouraged) |
| 1020 | if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): |
| 1021 | return "utf-16" # BOM included |
| 1022 | nullcount = sample.count(_null) |
| 1023 | if nullcount == 0: |
| 1024 | return "utf-8" |
| 1025 | if nullcount == 2: |
| 1026 | if sample[::2] == _null2: # 1st and 3rd are null |
| 1027 | return "utf-16-be" |
| 1028 | if sample[1::2] == _null2: # 2nd and 4th are null |
| 1029 | return "utf-16-le" |
| 1030 | # Did not detect 2 valid UTF-16 ascii-range characters |
| 1031 | if nullcount == 3: |
| 1032 | if sample[:3] == _null3: |
| 1033 | return "utf-32-be" |
| 1034 | if sample[1:] == _null3: |
| 1035 | return "utf-32-le" |
| 1036 | # Did not detect a valid UTF-32 ascii-range character |
| 1037 | return None |
| 1038 | |
| 1039 | |
| 1040 | def prepend_scheme_if_needed(url: str, new_scheme: str) -> str: |
no outgoing calls