From 8337ef7a25a3156392f9f39317fb950f746f619e Mon Sep 17 00:00:00 2001 From: Jiri Hnidek Date: Wed, 24 May 2017 22:24:21 +0200 Subject: [PATCH] bpo-30462: urllib: NO_PROXY env. variable can contain domain with asterisk * The method proxy_bypass_environment() is possible to bypass host, when NO_PROXY or no_proxy variable include domain specified with asterisk at the begining. e.g.: *.python.org. * Added and extended unit tests to conver new functionality. --- Lib/test/test_urllib.py | 23 +++++++++++++++++++---- Lib/urllib/request.py | 2 +- Misc/ACKS | 1 + Misc/NEWS | 2 ++ 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index c292d74f84a93c..3e15c6d8ce56f9 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -228,10 +228,11 @@ def test_getproxies_environment_keep_no_proxies(self): # getproxies_environment use lowered case truncated (no '_proxy') keys self.assertEqual('localhost', proxies['no']) # List of no_proxies with space. - self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') + self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234, *.foo.com') self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com')) self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888')) self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234')) + self.assertTrue(urllib.request.proxy_bypass_environment('bar.foo.com')) def test_proxy_cgi_ignore(self): try: @@ -248,7 +249,7 @@ def test_proxy_cgi_ignore(self): def test_proxy_bypass_environment_host_match(self): bypass = urllib.request.proxy_bypass_environment self.env.set('NO_PROXY', - 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t') + 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t, *.example.com') self.assertTrue(bypass('localhost')) self.assertTrue(bypass('LocalHost')) # MixedCase self.assertTrue(bypass('LOCALHOST')) # UPPERCASE @@ -256,10 +257,22 @@ def test_proxy_bypass_environment_host_match(self): self.assertTrue(bypass('foo.d.o.t')) # issue 29142 self.assertTrue(bypass('anotherdomain.com:8888')) self.assertTrue(bypass('www.newdomain.com:1234')) + self.assertTrue(bypass('foo.example.com')) self.assertFalse(bypass('prelocalhost')) self.assertFalse(bypass('newdomain.com')) # no port self.assertFalse(bypass('newdomain.com:1235')) # wrong port + def test_proxy_bypass_envirnoment_one_asterisk(self): + bypass = urllib.request.proxy_bypass_environment + self.env.set('NO_PROXY', '*') + self.assertTrue(bypass('localhost')) + self.assertTrue(bypass('LocalHost')) # MixedCase + self.assertTrue(bypass('LOCALHOST')) + self.assertTrue(bypass('newdomain.com:1234')) + self.assertTrue(bypass('foo.d.o.t')) + self.assertTrue(bypass('anotherdomain.com:8888')) + self.assertTrue(bypass('www.newdomain.com:1234')) + class ProxyTests_withOrderedEnv(unittest.TestCase): @@ -282,12 +295,14 @@ def test_getproxies_environment_prefer_lowercase(self): os.environ['HTTP_PROXY'] = 'http://somewhere:3128' proxies = urllib.request.getproxies_environment() self.assertEqual({}, proxies) - # Test lowercase preference of proxy bypass and correct matching including ports - os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' + # Test lowercase preference of proxy bypass, correct matching including ports + # and asterisk at begining + os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234, *.foo.com' os.environ['No_Proxy'] = 'xyz.com' self.assertTrue(urllib.request.proxy_bypass_environment('localhost')) self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678')) self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234')) + self.assertTrue(urllib.request.proxy_bypass_environment('bar.foo.com')) self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy')) self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) # Test lowercase preference with replacement diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index a192d527d8bc9a..1640953f56edee 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2507,7 +2507,7 @@ def proxy_bypass_environment(host, proxies=None): no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] for name in no_proxy_list: if name: - name = name.lstrip('.') # ignore leading dots + name = name.lstrip('*.') # ignore leading dots and asterisks name = re.escape(name) pattern = r'(.+\.)?%s$' % name if (re.match(pattern, hostonly, re.I) diff --git a/Misc/ACKS b/Misc/ACKS index b72c40c3330bc2..a18f0007692bcd 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1749,3 +1749,4 @@ evilzero Dhushyanth Ramasamy Subhendu Ghosh Sanjay Sundaresan +Jiri Hnidek diff --git a/Misc/NEWS b/Misc/NEWS index 69602384882457..e2325199bbee6e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1? Core and Builtins ----------------- +- bpo-30462: urllib: NO_PROXY env. variable can contain domain with asterisk + - bpo-27945: Fixed various segfaults with dict when input collections are mutated during searching, inserting or comparing. Based on patches by Duane Griffin and Tim Mitchell.