Convert a Uniform Resource Identifier(URI) into an Internationalized Resource Identifier(IRI). This is the algorithm from RFC 3987 Section 3.2, excluding step 4. Take an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and return a string containing the encoded result (e.g
(uri)
| 153 | |
| 154 | |
| 155 | def uri_to_iri(uri): |
| 156 | """ |
| 157 | Convert a Uniform Resource Identifier(URI) into an Internationalized |
| 158 | Resource Identifier(IRI). |
| 159 | |
| 160 | This is the algorithm from RFC 3987 Section 3.2, excluding step 4. |
| 161 | |
| 162 | Take an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and return |
| 163 | a string containing the encoded result (e.g. '/I%20♥%20Django/'). |
| 164 | """ |
| 165 | if uri is None: |
| 166 | return uri |
| 167 | uri = force_bytes(uri) |
| 168 | # Fast selective unquote: First, split on '%' and then starting with the |
| 169 | # second block, decode the first 2 bytes if they represent a hex code to |
| 170 | # decode. The rest of the block is the part after '%AB', not containing |
| 171 | # any '%'. Add that to the output without further processing. |
| 172 | bits = uri.split(b"%") |
| 173 | if len(bits) == 1: |
| 174 | iri = uri |
| 175 | else: |
| 176 | parts = [bits[0]] |
| 177 | append = parts.append |
| 178 | hextobyte = _hextobyte |
| 179 | for item in bits[1:]: |
| 180 | hex = item[:2] |
| 181 | if hex in hextobyte: |
| 182 | append(hextobyte[item[:2]]) |
| 183 | append(item[2:]) |
| 184 | else: |
| 185 | append(b"%") |
| 186 | append(item) |
| 187 | iri = b"".join(parts) |
| 188 | return repercent_broken_unicode(iri).decode() |
| 189 | |
| 190 | |
| 191 | def escape_uri_path(path): |