`urllib.parse`: URL Parsing

Recipes

from urllib.parse import urlparse, urlsplit, urldefrag


url = 'scheme://netloc:80/path;parameters?query=value#fragment'


>>> r = urlparse(url)
>>> r
ParseResult(scheme='scheme', netloc='netloc:80', path='/path;parameters', params='', query='query=value', fragment='fragment')
>>> assert r.scheme == 'scheme'
>>> assert r.netloc == 'netloc:80'
>>> assert r.hostname == 'netloc'
>>> assert r.port == 80
>>> assert r.path == '/path;parameters'
>>> assert r.params == ''  # deprecated, always ''
>>> assert r.query == 'query=value'
>>> assert r.fragment == 'fragment'
>>> assert r.geturl() == url


# be used instead of `urlparse()` if the more recent URL syntax
# allowing parameters to be applied to each segment of the path portion of the URL (see RFC 2396)
>>> r = urlsplit(url)
>>> r
SplitResult(scheme='scheme', netloc='netloc:80', path='/path;parameters', query='query=value', fragment='fragment')
>>> assert r.scheme == 'scheme'
>>> assert r.netloc == 'netloc:80'
>>> assert r.hostname == 'netloc'
>>> assert r.port == 80
>>> assert r.path == '/path;parameters'
>>> assert r.query == 'query=value'
>>> assert r.fragment == 'fragment'
>>> assert r.geturl() == url


>>> r = urldefrag(url)
>>> r
DefragResult(url='scheme://netloc:80/path;parameters?query=value', fragment='fragment')
>>> assert r.url == 'scheme://netloc:80/path;parameters?query=value'
>>> assert r.fragment == 'fragment'

Join (Concatenate) URL

from urllib.parse import urljoin


>>> url0 = 'https://www.com/path/x.html'
>>> urljoin(url0, 'y.html')
'https://www.com/path/y.html'


>>> url1 = 'https://www.com/path/'
>>> urljoin(url1, '/subpath/y.html')
'https://www.com/subpath/y.html'
>>> urljoin(url1, 'subpath/y.html')
'https://www.com/path/subpath/y.html'

Encode Query String

from urllib.parse import urlencode


q1 = {
    'q': 'query string',
    'page': 1,
}
q2 = {
    'q': ['q1', 'q2'],
    'page': 1,
}

>>> urlencode(q1)
'q=query+string&page=1'
>>> urlencode(q2, doseq=True)
'q=q1&q=q2&page=1'

Decode Query String

from urllib.parse import parse_qs


>>> parse_qs('q=query+string&page=1')
{'q': ['query string'], 'page': ['1']}
>>> parse_qs('q=q1&q=q2&page=1')
{'q': ['q1', 'q2'], 'page': ['1']}

from urllib.parse import parse_qsl


>>> parse_qsl('q=q1&q=q2&page=1')
[('q', 'q1'), ('q', 'q2'), ('page', '1')]

More details to see URL, URI, URN - Linux Cookbook.

urllib.parse: URL Parsing

Recipes for Python. Hands-on code examples, snippets and guides for daily work.

`urllib.parse`: URL Parsing

Recipes

Join (Concatenate) URL

Encode Query String

Decode Query String

More

References