#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline.

This helps in handling certain types invalid urls in a conservative manner.
This commit is contained in:
Senthil Kumaran 2013-05-29 05:54:31 -07:00
parent eb4c9c77b8
commit c70a6ae49b
3 changed files with 17 additions and 0 deletions

View file

@ -157,6 +157,7 @@ class RuleLine:
if path == '' and not allowance:
# an empty value means allow all
allowance = True
path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
self.path = urllib.parse.quote(path)
self.allowance = allowance