MCPcopy
hub / github.com/encode/httpx / urlparse

Function urlparse

httpx/_urlparse.py:213–345  ·  view source on GitHub ↗
(url: str = "", **kwargs: str | None)

Source from the content-addressed store, hash-verified

211
212
213def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:
214 # Initial basic checks on allowable URLs.
215 # ---------------------------------------
216
217 # Hard limit the maximum allowable URL length.
218 if len(url) > MAX_URL_LENGTH:
219 raise InvalidURL("URL too long")
220
221 # If a URL includes any ASCII control characters including \t, \r, \n,
222 # then treat it as invalid.
223 if any(char.isascii() and not char.isprintable() for char in url):
224 char = next(char for char in url if char.isascii() and not char.isprintable())
225 idx = url.find(char)
226 error = (
227 f"Invalid non-printable ASCII character in URL, {char!r} at position {idx}."
228 )
229 raise InvalidURL(error)
230
231 # Some keyword arguments require special handling.
232 # ------------------------------------------------
233
234 # Coerce "port" to a string, if it is provided as an integer.
235 if "port" in kwargs:
236 port = kwargs["port"]
237 kwargs["port"] = str(port) if isinstance(port, int) else port
238
239 # Replace "netloc" with "host and "port".
240 if "netloc" in kwargs:
241 netloc = kwargs.pop("netloc") or ""
242 kwargs["host"], _, kwargs["port"] = netloc.partition(":")
243
244 # Replace "username" and/or "password" with "userinfo".
245 if "username" in kwargs or "password" in kwargs:
246 username = quote(kwargs.pop("username", "") or "", safe=USERNAME_SAFE)
247 password = quote(kwargs.pop("password", "") or "", safe=PASSWORD_SAFE)
248 kwargs["userinfo"] = f"{username}:{password}" if password else username
249
250 # Replace "raw_path" with "path" and "query".
251 if "raw_path" in kwargs:
252 raw_path = kwargs.pop("raw_path") or ""
253 kwargs["path"], seperator, kwargs["query"] = raw_path.partition("?")
254 if not seperator:
255 kwargs["query"] = None
256
257 # Ensure that IPv6 "host" addresses are always escaped with "[...]".
258 if "host" in kwargs:
259 host = kwargs.get("host") or ""
260 if ":" in host and not (host.startswith("[") and host.endswith("]")):
261 kwargs["host"] = f"[{host}]"
262
263 # If any keyword arguments are provided, ensure they are valid.
264 # -------------------------------------------------------------
265
266 for key, value in kwargs.items():
267 if value is not None:
268 if len(value) > MAX_URL_LENGTH:
269 raise InvalidURL(f"URL component '{key}' too long")
270

Callers 3

__init__Method · 0.90
test_urlparseFunction · 0.90
copy_withMethod · 0.85

Calls 9

InvalidURLClass · 0.85
quoteFunction · 0.85
encode_hostFunction · 0.85
normalize_portFunction · 0.85
validate_pathFunction · 0.85
normalize_pathFunction · 0.85
ParseResultClass · 0.85
getMethod · 0.45
itemsMethod · 0.45

Tested by 1

test_urlparseFunction · 0.72