urllib.parse
: URL Parsing
Recipes
from urllib.parse import urlparse, urlsplit, urldefrag
url = 'scheme://netloc:80/path;parameters?query=value#fragment'
>>> r = urlparse(url)
>>> r
ParseResult(scheme='scheme', netloc='netloc:80', path='/path;parameters', params='', query='query=value', fragment='fragment')
>>> assert r.scheme == 'scheme'
>>> assert r.netloc == 'netloc:80'
>>> assert r.hostname == 'netloc'
>>> assert r.port == 80
>>> assert r.path == '/path;parameters'
>>> assert r.params == '' # deprecated, always ''
>>> assert r.query == 'query=value'
>>> assert r.fragment == 'fragment'
>>> assert r.geturl() == url
# be used instead of `urlparse()` if the more recent URL syntax
# allowing parameters to be applied to each segment of the path portion of the URL (see RFC 2396)
>>> r = urlsplit(url)
>>> r
SplitResult(scheme='scheme', netloc='netloc:80', path='/path;parameters', query='query=value', fragment='fragment')
>>> assert r.scheme == 'scheme'
>>> assert r.netloc == 'netloc:80'
>>> assert r.hostname == 'netloc'
>>> assert r.port == 80
>>> assert r.path == '/path;parameters'
>>> assert r.query == 'query=value'
>>> assert r.fragment == 'fragment'
>>> assert r.geturl() == url
>>> r = urldefrag(url)
>>> r
DefragResult(url='scheme://netloc:80/path;parameters?query=value', fragment='fragment')
>>> assert r.url == 'scheme://netloc:80/path;parameters?query=value'
>>> assert r.fragment == 'fragment'
Join (Concatenate) URL
from urllib.parse import urljoin
>>> url0 = 'https://www.com/path/x.html'
>>> urljoin(url0, 'y.html')
'https://www.com/path/y.html'
>>> url1 = 'https://www.com/path/'
>>> urljoin(url1, '/subpath/y.html')
'https://www.com/subpath/y.html'
>>> urljoin(url1, 'subpath/y.html')
'https://www.com/path/subpath/y.html'
Encode Query String
from urllib.parse import urlencode
q1 = {
'q': 'query string',
'page': 1,
}
q2 = {
'q': ['q1', 'q2'],
'page': 1,
}
>>> urlencode(q1)
'q=query+string&page=1'
>>> urlencode(q2, doseq=True)
'q=q1&q=q2&page=1'
Decode Query String
from urllib.parse import parse_qs
>>> parse_qs('q=query+string&page=1')
{'q': ['query string'], 'page': ['1']}
>>> parse_qs('q=q1&q=q2&page=1')
{'q': ['q1', 'q2'], 'page': ['1']}
from urllib.parse import parse_qsl
>>> parse_qsl('q=q1&q=q2&page=1')
[('q', 'q1'), ('q', 'q2'), ('page', '1')]
More
More details to see URL, URI, URN - Linux Cookbook.