(self)
| 180 | self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout) |
| 181 | |
| 182 | def test_decoding_callbacks(self): |
| 183 | # This is a test for a decoding callback handler |
| 184 | # that allows the decoding of the invalid sequence |
| 185 | # "\xc0\x80" and returns "\x00" instead of raising an error. |
| 186 | # All other illegal sequences will be handled strictly. |
| 187 | def relaxedutf8(exc): |
| 188 | if not isinstance(exc, UnicodeDecodeError): |
| 189 | raise TypeError("don't know how to handle %r" % exc) |
| 190 | if exc.object[exc.start:exc.start+2] == b"\xc0\x80": |
| 191 | return ("\x00", exc.start+2) # retry after two bytes |
| 192 | else: |
| 193 | raise exc |
| 194 | |
| 195 | codecs.register_error("test.relaxedutf8", relaxedutf8) |
| 196 | |
| 197 | # all the "\xc0\x80" will be decoded to "\x00" |
| 198 | sin = b"a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" |
| 199 | sout = "a\x00b\x00c\xfc\x00\x00" |
| 200 | self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) |
| 201 | |
| 202 | # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised |
| 203 | sin = b"\xc0\x80\xc0\x81" |
| 204 | self.assertRaises(UnicodeDecodeError, sin.decode, |
| 205 | "utf-8", "test.relaxedutf8") |
| 206 | |
| 207 | def test_charmapencode(self): |
| 208 | # For charmap encodings the replacement string will be |
nothing calls this directly
no test coverage detected