Python 3 Start
This commit is contained in:
parent
9f029c8ceb
commit
141fe8d80c
67 changed files with 264 additions and 274 deletions
|
@ -25,8 +25,8 @@ __license__ = "Python"
|
|||
|
||||
import re
|
||||
import unicodedata
|
||||
import urlparse
|
||||
from urllib import quote, unquote
|
||||
import urllib.parse
|
||||
from urllib.parse import quote, unquote
|
||||
|
||||
default_port = {
|
||||
'http': 80,
|
||||
|
@ -52,7 +52,7 @@ normalizers = (Normalizer(re.compile(
|
|||
def normalize(url, assume_scheme=False):
|
||||
"""Normalize a URL."""
|
||||
|
||||
scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip())
|
||||
scheme, auth, path, query, fragment = urllib.parse.urlsplit(url.strip())
|
||||
userinfo, host, port = re.search('([^@]*@)?([^:]*):?(.*)', auth).groups()
|
||||
|
||||
# Always provide the URI scheme in lowercase characters.
|
||||
|
@ -78,7 +78,7 @@ def normalize(url, assume_scheme=False):
|
|||
# Always use uppercase A-through-F characters when percent-encoding.
|
||||
# All portions of the URI must be utf-8 encoded NFC from Unicode strings
|
||||
def clean(string):
|
||||
string = unicode(unquote(string), 'utf-8', 'replace')
|
||||
string = str(unquote(string), 'utf-8', 'replace')
|
||||
return unicodedata.normalize('NFC', string).encode('utf-8')
|
||||
|
||||
path = quote(clean(path), "~:/?#[]@!$&'()*+,;=")
|
||||
|
@ -118,7 +118,7 @@ def normalize(url, assume_scheme=False):
|
|||
|
||||
# For schemes that define a port, use an empty port if the default is
|
||||
# desired
|
||||
if port and scheme in default_port.keys():
|
||||
if port and scheme in list(default_port.keys()):
|
||||
if port.isdigit():
|
||||
port = str(int(port))
|
||||
if int(port) == default_port[scheme]:
|
||||
|
@ -130,7 +130,7 @@ def normalize(url, assume_scheme=False):
|
|||
auth += ":" + port
|
||||
if url.endswith("#") and query == "" and fragment == "":
|
||||
path += "#"
|
||||
normal_url = urlparse.urlunsplit((scheme, auth, path, query,
|
||||
normal_url = urllib.parse.urlunsplit((scheme, auth, path, query,
|
||||
fragment)).replace("http:///", "http://")
|
||||
for norm in normalizers:
|
||||
m = norm.regex.match(normal_url)
|
||||
|
|
Reference in a new issue