bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (#4529)

This commit is contained in:
Berker Peksag 2017-11-24 02:40:26 +03:00 committed by Raymond Hettinger
parent 0858495a50
commit 3df02dbc8e
4 changed files with 19 additions and 12 deletions

View file

@ -3,7 +3,6 @@ import os
import threading
import unittest
import urllib.robotparser
from collections import namedtuple
from test import support
from http.server import BaseHTTPRequestHandler, HTTPServer
@ -87,6 +86,10 @@ class BaseRequestRateTest(BaseRobotTest):
self.parser.crawl_delay(agent), self.crawl_delay
)
if self.request_rate:
self.assertIsInstance(
self.parser.request_rate(agent),
urllib.robotparser.RequestRate
)
self.assertEqual(
self.parser.request_rate(agent).requests,
self.request_rate.requests
@ -108,7 +111,7 @@ Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
"""
agent = 'figtree'
request_rate = namedtuple('req_rate', 'requests seconds')(9, 30)
request_rate = urllib.robotparser.RequestRate(9, 30)
crawl_delay = 3
good = [('figtree', '/foo.html')]
bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html',
@ -237,7 +240,7 @@ Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
"""
request_rate = namedtuple('req_rate', 'requests seconds')(3, 15)
request_rate = urllib.robotparser.RequestRate(3, 15)
crawl_delay = 1
good = ['/', '/test.html']
bad = ['/cyberworld/map/index.html']