gh-66897: Upgrade HTTP CONNECT to protocol HTTP/1.1 (#8305)

* bpo-22708: Upgrade HTTP CONNECT to protocol HTTP/1.1 (GH-NNNN)

Use protocol HTTP/1.1 when sending HTTP CONNECT tunnelling requests;
generate Host: headers if one is not already provided (required by
HTTP/1.1), convert IDN domains to punycode in HTTP CONNECT requests.

* Refactor tests to pass under -bb (fix ByteWarnings); missed some lines >80.

* Use consistent 'tunnelling' spelling in Lib/http/client.py

* Lib/test/test_httplib: Remove remnant of obsoleted test.

* Use dict.copy() not copy.copy()

* fix version changed

* Update Lib/http/client.py

Co-authored-by: bgehman <bgehman@users.noreply.github.com>

* Switch to for/else: syntax, as suggested

* Don't use for: else:

* Sure, fine, w/e

* Oops

* 1nm to the left

---------

Co-authored-by: Éric <merwok@netwok.org>
Co-authored-by: bgehman <bgehman@users.noreply.github.com>
Co-authored-by: Oleg Iarygin <oleg@arhadthedev.net>
This commit is contained in:
Michael Handler 2023-04-04 21:55:24 -07:00 committed by GitHub
parent a62ff97075
commit 1a8f862e32
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 167 additions and 21 deletions

View file

@ -353,6 +353,13 @@ HTTPConnection Objects
The *headers* argument should be a mapping of extra HTTP headers to send with The *headers* argument should be a mapping of extra HTTP headers to send with
the CONNECT request. the CONNECT request.
As HTTP/1.1 is used for HTTP CONNECT tunnelling request, `as per the RFC
<https://tools.ietf.org/html/rfc7231#section-4.3.6>`_, a HTTP ``Host:``
header must be provided, matching the authority-form of the request target
provided as the destination for the CONNECT request. If a HTTP ``Host:``
header is not provided via the headers argument, one is generated and
transmitted automatically.
For example, to tunnel through a HTTPS proxy server running locally on port For example, to tunnel through a HTTPS proxy server running locally on port
8080, we would pass the address of the proxy to the :class:`HTTPSConnection` 8080, we would pass the address of the proxy to the :class:`HTTPSConnection`
constructor, and the address of the host that we eventually want to reach to constructor, and the address of the host that we eventually want to reach to
@ -365,6 +372,11 @@ HTTPConnection Objects
.. versionadded:: 3.2 .. versionadded:: 3.2
.. versionchanged:: 3.12
HTTP CONNECT tunnelling requests use protocol HTTP/1.1, upgraded from
protocol HTTP/1.0. ``Host:`` HTTP headers are mandatory for HTTP/1.1, so
one will be automatically generated and transmitted if not provided in
the headers argument.
.. method:: HTTPConnection.connect() .. method:: HTTPConnection.connect()

View file

@ -870,9 +870,9 @@ class HTTPConnection:
def set_tunnel(self, host, port=None, headers=None): def set_tunnel(self, host, port=None, headers=None):
"""Set up host and port for HTTP CONNECT tunnelling. """Set up host and port for HTTP CONNECT tunnelling.
In a connection that uses HTTP CONNECT tunneling, the host passed to the In a connection that uses HTTP CONNECT tunnelling, the host passed to
constructor is used as a proxy server that relays all communication to the constructor is used as a proxy server that relays all communication
the endpoint passed to `set_tunnel`. This done by sending an HTTP to the endpoint passed to `set_tunnel`. This done by sending an HTTP
CONNECT request to the proxy server when the connection is established. CONNECT request to the proxy server when the connection is established.
This method must be called before the HTTP connection has been This method must be called before the HTTP connection has been
@ -880,6 +880,13 @@ class HTTPConnection:
The headers argument should be a mapping of extra HTTP headers to send The headers argument should be a mapping of extra HTTP headers to send
with the CONNECT request. with the CONNECT request.
As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC
(https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host:
header must be provided, matching the authority-form of the request
target provided as the destination for the CONNECT request. If a
HTTP Host: header is not provided via the headers argument, one
is generated and transmitted automatically.
""" """
if self.sock: if self.sock:
@ -887,10 +894,15 @@ class HTTPConnection:
self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
if headers: if headers:
self._tunnel_headers = headers self._tunnel_headers = headers.copy()
else: else:
self._tunnel_headers.clear() self._tunnel_headers.clear()
if not any(header.lower() == "host" for header in self._tunnel_headers):
encoded_host = self._tunnel_host.encode("idna").decode("ascii")
self._tunnel_headers["Host"] = "%s:%d" % (
encoded_host, self._tunnel_port)
def _get_hostport(self, host, port): def _get_hostport(self, host, port):
if port is None: if port is None:
i = host.rfind(':') i = host.rfind(':')
@ -915,8 +927,9 @@ class HTTPConnection:
self.debuglevel = level self.debuglevel = level
def _tunnel(self): def _tunnel(self):
connect = b"CONNECT %s:%d HTTP/1.0\r\n" % ( connect = b"CONNECT %s:%d %s\r\n" % (
self._tunnel_host.encode("ascii"), self._tunnel_port) self._tunnel_host.encode("idna"), self._tunnel_port,
self._http_vsn_str.encode("ascii"))
headers = [connect] headers = [connect]
for header, value in self._tunnel_headers.items(): for header, value in self._tunnel_headers.items():
headers.append(f"{header}: {value}\r\n".encode("latin-1")) headers.append(f"{header}: {value}\r\n".encode("latin-1"))

View file

@ -2187,11 +2187,12 @@ class HTTPResponseTest(TestCase):
class TunnelTests(TestCase): class TunnelTests(TestCase):
def setUp(self): def setUp(self):
response_text = ( response_text = (
'HTTP/1.0 200 OK\r\n\r\n' # Reply to CONNECT 'HTTP/1.1 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n' # Reply to HEAD 'HTTP/1.1 200 OK\r\n' # Reply to HEAD
'Content-Length: 42\r\n\r\n' 'Content-Length: 42\r\n\r\n'
) )
self.host = 'proxy.com' self.host = 'proxy.com'
self.port = client.HTTP_PORT
self.conn = client.HTTPConnection(self.host) self.conn = client.HTTPConnection(self.host)
self.conn._create_connection = self._create_connection(response_text) self.conn._create_connection = self._create_connection(response_text)
@ -2203,15 +2204,45 @@ class TunnelTests(TestCase):
return FakeSocket(response_text, host=address[0], port=address[1]) return FakeSocket(response_text, host=address[0], port=address[1])
return create_connection return create_connection
def test_set_tunnel_host_port_headers(self): def test_set_tunnel_host_port_headers_add_host_missing(self):
tunnel_host = 'destination.com' tunnel_host = 'destination.com'
tunnel_port = 8888 tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)'} tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)'}
tunnel_headers_after = tunnel_headers.copy()
tunnel_headers_after['Host'] = '%s:%d' % (tunnel_host, tunnel_port)
self.conn.set_tunnel(tunnel_host, port=tunnel_port, self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers) headers=tunnel_headers)
self.conn.request('HEAD', '/', '') self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host) self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT) self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers_after)
def test_set_tunnel_host_port_headers_set_host_identical(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % (tunnel_host, tunnel_port)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
def test_set_tunnel_host_port_headers_set_host_different(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % ('example.com', 4200)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host) self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port) self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers) self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
@ -2223,17 +2254,96 @@ class TunnelTests(TestCase):
'destination.com') 'destination.com')
def test_connect_with_tunnel(self): def test_connect_with_tunnel(self):
self.conn.set_tunnel('destination.com') d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_connect_with_tunnel_with_default_port(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_connect_with_tunnel_with_nonstandard_port(self):
d = {
b'host': b'destination.com',
b'port': 8888,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s:%(port)d\r\n' % d,
self.conn.sock.data)
# This request is not RFC-valid, but it's been possible with the library
# for years, so don't break it unexpectedly... This also tests
# case-insensitivity when injecting Host: headers if they're missing.
def test_connect_with_tunnel_with_different_host_header(self):
d = {
b'host': b'destination.com',
b'tunnel_host_header': b'example.com:9876',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(
d[b'host'].decode('ascii'),
headers={'HOST': d[b'tunnel_host_header'].decode('ascii')})
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'HOST: %(tunnel_host_header)s\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_connect_with_tunnel_different_host(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_connect_with_tunnel_idna(self):
dest = '\u03b4\u03c0\u03b8.gr'
dest_port = b'%s:%d' % (dest.encode('idna'), client.HTTP_PORT)
expected = b'CONNECT %s HTTP/1.1\r\nHost: %s\r\n\r\n' % (
dest_port, dest_port)
self.conn.set_tunnel(dest)
self.conn.request('HEAD', '/', '') self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host) self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT) self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data) self.assertIn(expected, self.conn.sock.data)
# issue22095
self.assertNotIn(b'Host: destination.com:None', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)
# This test should be removed when CONNECT gets the HTTP/1.1 blessing
self.assertNotIn(b'Host: proxy.com', self.conn.sock.data)
def test_tunnel_connect_single_send_connection_setup(self): def test_tunnel_connect_single_send_connection_setup(self):
"""Regresstion test for https://bugs.python.org/issue43332.""" """Regresstion test for https://bugs.python.org/issue43332."""
@ -2253,12 +2363,19 @@ class TunnelTests(TestCase):
msg=f'unexpected proxy data sent {proxy_setup_data_sent!r}') msg=f'unexpected proxy data sent {proxy_setup_data_sent!r}')
def test_connect_put_request(self): def test_connect_put_request(self):
self.conn.set_tunnel('destination.com') d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('PUT', '/', '') self.conn.request('PUT', '/', '')
self.assertEqual(self.conn.sock.host, self.host) self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT) self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data) self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
self.assertIn(b'Host: destination.com', self.conn.sock.data) b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'PUT / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_tunnel_debuglog(self): def test_tunnel_debuglog(self):
expected_header = 'X-Dummy: 1' expected_header = 'X-Dummy: 1'

View file

@ -693,6 +693,7 @@ Anders Hammarquist
Mark Hammond Mark Hammond
Harald Hanche-Olsen Harald Hanche-Olsen
Manus Hand Manus Hand
Michael Handler
Andreas Hangauer Andreas Hangauer
Milton L. Hankins Milton L. Hankins
Carl Bordum Hansen Carl Bordum Hansen

View file

@ -0,0 +1,3 @@
http.client CONNECT method tunnel improvements: Use HTTP 1.1 protocol; send
a matching Host: header with CONNECT, if one is not provided; convert IDN
domain names to Punycode. Patch by Michael Handler.