Issue #25400: RobotFileParser now correctly returns default values for crawl_delay and request_rate

Initial patch by Peter Wirtz.
This commit is contained in:
Berker Peksag 2016-09-18 20:17:58 +03:00
parent 85c98bf968
commit 9a7bbb2e3f
3 changed files with 46 additions and 21 deletions

View file

@ -175,16 +175,20 @@ class RobotFileParser:
return True
def crawl_delay(self, useragent):
if not self.mtime():
return None
for entry in self.entries:
if entry.applies_to(useragent):
return entry.delay
return None
return self.default_entry.delay
def request_rate(self, useragent):
if not self.mtime():
return None
for entry in self.entries:
if entry.applies_to(useragent):
return entry.req_rate
return None
return self.default_entry.req_rate
def __str__(self):
return ''.join([str(entry) + "\n" for entry in self.entries])