| 211 | |
| 212 | |
| 213 | def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: |
| 214 | # Initial basic checks on allowable URLs. |
| 215 | # --------------------------------------- |
| 216 | |
| 217 | # Hard limit the maximum allowable URL length. |
| 218 | if len(url) > MAX_URL_LENGTH: |
| 219 | raise InvalidURL("URL too long") |
| 220 | |
| 221 | # If a URL includes any ASCII control characters including \t, \r, \n, |
| 222 | # then treat it as invalid. |
| 223 | if any(char.isascii() and not char.isprintable() for char in url): |
| 224 | char = next(char for char in url if char.isascii() and not char.isprintable()) |
| 225 | idx = url.find(char) |
| 226 | error = ( |
| 227 | f"Invalid non-printable ASCII character in URL, {char!r} at position {idx}." |
| 228 | ) |
| 229 | raise InvalidURL(error) |
| 230 | |
| 231 | # Some keyword arguments require special handling. |
| 232 | # ------------------------------------------------ |
| 233 | |
| 234 | # Coerce "port" to a string, if it is provided as an integer. |
| 235 | if "port" in kwargs: |
| 236 | port = kwargs["port"] |
| 237 | kwargs["port"] = str(port) if isinstance(port, int) else port |
| 238 | |
| 239 | # Replace "netloc" with "host and "port". |
| 240 | if "netloc" in kwargs: |
| 241 | netloc = kwargs.pop("netloc") or "" |
| 242 | kwargs["host"], _, kwargs["port"] = netloc.partition(":") |
| 243 | |
| 244 | # Replace "username" and/or "password" with "userinfo". |
| 245 | if "username" in kwargs or "password" in kwargs: |
| 246 | username = quote(kwargs.pop("username", "") or "", safe=USERNAME_SAFE) |
| 247 | password = quote(kwargs.pop("password", "") or "", safe=PASSWORD_SAFE) |
| 248 | kwargs["userinfo"] = f"{username}:{password}" if password else username |
| 249 | |
| 250 | # Replace "raw_path" with "path" and "query". |
| 251 | if "raw_path" in kwargs: |
| 252 | raw_path = kwargs.pop("raw_path") or "" |
| 253 | kwargs["path"], seperator, kwargs["query"] = raw_path.partition("?") |
| 254 | if not seperator: |
| 255 | kwargs["query"] = None |
| 256 | |
| 257 | # Ensure that IPv6 "host" addresses are always escaped with "[...]". |
| 258 | if "host" in kwargs: |
| 259 | host = kwargs.get("host") or "" |
| 260 | if ":" in host and not (host.startswith("[") and host.endswith("]")): |
| 261 | kwargs["host"] = f"[{host}]" |
| 262 | |
| 263 | # If any keyword arguments are provided, ensure they are valid. |
| 264 | # ------------------------------------------------------------- |
| 265 | |
| 266 | for key, value in kwargs.items(): |
| 267 | if value is not None: |
| 268 | if len(value) > MAX_URL_LENGTH: |
| 269 | raise InvalidURL(f"URL component '{key}' too long") |
| 270 | |