Skip to content

Commit ff847d1

Browse files
miss-islingtonrhettinger
authored andcommitted
bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (GH-4529) (#4533)
(cherry picked from commit 3df02db)
1 parent a645b23 commit ff847d1

File tree

4 files changed

+19
-12
lines changed

4 files changed

+19
-12
lines changed

‎Doc/library/urllib.robotparser.rst‎

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
6969
.. method:: request_rate(useragent)
7070

7171
Returns the contents of the ``Request-rate`` parameter from
72-
``robots.txt`` in the form of a :func:`~collections.namedtuple`
73-
``(requests, seconds)``. If there is no such parameter or it doesn't
74-
apply to the *useragent* specified or the ``robots.txt`` entry for this
75-
parameter has invalid syntax, return ``None``.
72+
``robots.txt`` as a :term:`named tuple` ``RequestRate(requests, seconds)``.
73+
If there is no such parameter or it doesn't apply to the *useragent*
74+
specified or the ``robots.txt`` entry for this parameter has invalid
75+
syntax, return ``None``.
7676

7777
.. versionadded:: 3.6
7878

‎Lib/test/test_robotparser.py‎

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33
import unittest
44
import urllib.robotparser
5-
from collections import namedtuple
65
from test import support
76
from http.server import BaseHTTPRequestHandler, HTTPServer
87
try:
@@ -90,6 +89,10 @@ def test_request_rate(self):
9089
self.parser.crawl_delay(agent), self.crawl_delay
9190
)
9291
if self.request_rate:
92+
self.assertIsInstance(
93+
self.parser.request_rate(agent),
94+
urllib.robotparser.RequestRate
95+
)
9396
self.assertEqual(
9497
self.parser.request_rate(agent).requests,
9598
self.request_rate.requests
@@ -111,7 +114,7 @@ class CrawlDelayAndRequestRateTest(BaseRequestRateTest, unittest.TestCase):
111114
Disallow: /%7ejoe/index.html
112115
"""
113116
agent = 'figtree'
114-
request_rate = namedtuple('req_rate', 'requests seconds')(9, 30)
117+
request_rate = urllib.robotparser.RequestRate(9, 30)
115118
crawl_delay = 3
116119
good = [('figtree', '/foo.html')]
117120
bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html',
@@ -240,7 +243,7 @@ class DefaultEntryTest(BaseRequestRateTest, unittest.TestCase):
240243
Request-rate: 3/15
241244
Disallow: /cyberworld/map/
242245
"""
243-
request_rate = namedtuple('req_rate', 'requests seconds')(3, 15)
246+
request_rate = urllib.robotparser.RequestRate(3, 15)
244247
crawl_delay = 1
245248
good = ['/', '/test.html']
246249
bad = ['/cyberworld/map/index.html']

‎Lib/urllib/robotparser.py‎

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
__all__ = ["RobotFileParser"]
1818

19+
RequestRate = collections.namedtuple("RequestRate", "requests seconds")
20+
21+
1922
class RobotFileParser:
2023
""" This class provides a set of methods to read, parse and answer
2124
questions about a single robots.txt file.
@@ -136,11 +139,7 @@ def parse(self, lines):
136139
# check if all values are sane
137140
if (len(numbers) == 2 and numbers[0].strip().isdigit()
138141
and numbers[1].strip().isdigit()):
139-
req_rate = collections.namedtuple('req_rate',
140-
'requests seconds')
141-
entry.req_rate = req_rate
142-
entry.req_rate.requests = int(numbers[0])
143-
entry.req_rate.seconds = int(numbers[1])
142+
entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1]))
144143
state = 2
145144
if state == 2:
146145
self._add_entry(entry)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix wrong usage of :func:`collections.namedtuple` in
2+
the :meth:`RobotFileParser.parse() <urllib.robotparser.RobotFileParser.parse>`
3+
method.
4+
5+
Initial patch by Robin Wellner.

0 commit comments

Comments
 (0)