Skip to content

Commit d50cbf5

Browse files
committed
Replaced urllib with httpx. Supports HTTP/2, more convenient and secure.
Fixes exception raising.
1 parent 11f7632 commit d50cbf5

File tree

8 files changed

+56
-113
lines changed

8 files changed

+56
-113
lines changed

ipwhois/experimental.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def get_bulk_asn_whois(addresses=None, retry_count=3, timeout=120):
112112

113113
def bulk_lookup_rdap(addresses=None, inc_raw=False, retry_count=3, depth=0,
114114
excluded_entities=None, rate_limit_timeout=60,
115-
socket_timeout=10, asn_timeout=240, proxy_openers=None):
115+
socket_timeout=10, asn_timeout=240, http_clients=None):
116116
"""
117117
The function for bulk retrieving and parsing whois information for a list
118118
of IP addresses via HTTP (RDAP). This bulk lookup method uses bulk
@@ -138,8 +138,8 @@ def bulk_lookup_rdap(addresses=None, inc_raw=False, retry_count=3, depth=0,
138138
connections in seconds. Defaults to 10.
139139
asn_timeout (:obj:`int`): The default timeout for bulk ASN lookups in
140140
seconds. Defaults to 240.
141-
proxy_openers (:obj:`list` of :obj:`OpenerDirector`): Proxy openers
142-
for single/rotating proxy support. Defaults to None.
141+
http_clients (:obj:`list` of :obj:`httpx.Client`): httpx clients
142+
for single/rotating proxy and fingerprint support. Defaults to None.
143143
144144
Returns:
145145
namedtuple:
@@ -209,11 +209,11 @@ def bulk_lookup_rdap(addresses=None, inc_raw=False, retry_count=3, depth=0,
209209
}
210210
asn_parsed_results = {}
211211

212-
if proxy_openers is None:
212+
if http_clients is None:
213213

214-
proxy_openers = [None]
214+
http_clients = [None]
215215

216-
proxy_openers_copy = iter(proxy_openers)
216+
http_clients_copy = iter(http_clients)
217217

218218
# Make sure addresses is unique
219219
unique_ip_list = list(unique_everseen(addresses))
@@ -347,19 +347,19 @@ def bulk_lookup_rdap(addresses=None, inc_raw=False, retry_count=3, depth=0,
347347

348348
rate_tracker[rir]['count'] += 1
349349

350-
# Get the next proxy opener to use, or None
350+
# Get the next HTTP client object to use, or None
351351
try:
352352

353-
opener = next(proxy_openers_copy)
353+
client = next(http_clients_copy)
354354

355355
# Start at the beginning if all have been used
356356
except StopIteration:
357357

358-
proxy_openers_copy = iter(proxy_openers)
359-
opener = next(proxy_openers_copy)
358+
http_clients_copy = iter(http_clients)
359+
client = next(http_clients_copy)
360360

361361
# Instantiate the objects needed for the RDAP lookup
362-
net = Net(ip, timeout=socket_timeout, proxy_opener=opener)
362+
net = Net(ip, timeout=socket_timeout, http_client=client)
363363
rdap = RDAP(net)
364364

365365
try:

ipwhois/ipwhois.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ class IPWhois:
4040
An IPv4 or IPv6 address
4141
timeout (:obj:`int`): The default timeout for socket connections in
4242
seconds. Defaults to 5.
43-
proxy_opener (:obj:`urllib.request.OpenerDirector`): The request for
44-
proxy support. Defaults to None.
43+
http_client (:obj:`httpx.Client`): HTTP client object. Proxies are here.
44+
Defaults to None.
4545
"""
4646

47-
def __init__(self, address, timeout=5, proxy_opener=None):
47+
def __init__(self, address, timeout=5, http_client=None):
4848

4949
self.net = Net(
50-
address=address, timeout=timeout, proxy_opener=proxy_opener
50+
address=address, timeout=timeout, http_client=http_client
5151
)
5252
self.ipasn = IPASN(self.net)
5353

@@ -61,7 +61,7 @@ def __init__(self, address, timeout=5, proxy_opener=None):
6161
def __repr__(self):
6262

6363
return 'IPWhois({0}, {1}, {2})'.format(
64-
self.address_str, str(self.timeout), repr(self.net.opener)
64+
self.address_str, str(self.timeout), repr(self.net.http_client)
6565
)
6666

6767
def lookup_whois(self, inc_raw=False, retry_count=3, get_referral=False,

ipwhois/net.py

+17-45
Original file line numberDiff line numberDiff line change
@@ -49,22 +49,11 @@
4949
IPv4Address,
5050
IPv6Address)
5151

52-
try: # pragma: no cover
53-
from urllib.request import (OpenerDirector,
54-
ProxyHandler,
55-
build_opener,
56-
Request,
57-
URLError,
58-
HTTPError)
59-
from urllib.parse import urlencode
60-
except ImportError: # pragma: no cover
61-
from urllib2 import (OpenerDirector,
62-
ProxyHandler,
63-
build_opener,
64-
Request,
65-
URLError,
66-
HTTPError)
67-
from urllib import urlencode
52+
from httpx import (Client,
53+
HTTPStatusError,
54+
TransportError,
55+
InvalidURL)
56+
from urllib.parse import urlencode
6857

6958
log = logging.getLogger(__name__)
7059

@@ -101,15 +90,15 @@ class Net:
10190
An IPv4 or IPv6 address
10291
timeout (:obj:`int`): The default timeout for socket connections in
10392
seconds. Defaults to 5.
104-
proxy_opener (:obj:`urllib.request.OpenerDirector`): The request for
105-
proxy support. Defaults to None.
93+
http_client (:obj:`httpx.client`): httpx client allows you to customize
94+
usage of HTTP by this lib. Proxies are also configured via it.
10695
10796
Raises:
10897
IPDefinedError: The address provided is defined (does not need to be
10998
resolved).
11099
"""
111100

112-
def __init__(self, address, timeout=5, proxy_opener=None):
101+
def __init__(self, address, timeout=5, http_client=None):
113102

114103
# IPv4Address or IPv6Address
115104
if isinstance(address, IPv4Address) or isinstance(
@@ -129,15 +118,10 @@ def __init__(self, address, timeout=5, proxy_opener=None):
129118
self.dns_resolver.timeout = timeout
130119
self.dns_resolver.lifetime = timeout
131120

132-
# Proxy opener.
133-
if isinstance(proxy_opener, OpenerDirector):
121+
if not http_client:
122+
http_client = Client()
134123

135-
self.opener = proxy_opener
136-
137-
else:
138-
139-
handler = ProxyHandler()
140-
self.opener = build_opener(handler)
124+
self.http_client = http_client
141125

142126
# IP address in string format for use in queries.
143127
self.address_str = self.address.__str__()
@@ -709,10 +693,10 @@ def get_http_json(self, url=None, retry_count=3, rate_limit_timeout=120,
709693

710694
return d
711695

712-
except HTTPError as e: # pragma: no cover
696+
except HTTPStatusError as e: # pragma: no cover
713697

714698
# RIPE is producing this HTTP error rather than a JSON error.
715-
if e.code == 429:
699+
if e.response.status_code == 429:
716700

717701
log.debug('HTTP query rate limit exceeded.')
718702

@@ -737,7 +721,7 @@ def get_http_json(self, url=None, retry_count=3, rate_limit_timeout=120,
737721
raise HTTPLookupError('HTTP lookup failed for {0} with error '
738722
'code {1}.'.format(url, str(e.code)))
739723

740-
except (URLError, socket.timeout, socket.error) as e:
724+
except (TransportError,) as e:
741725

742726
log.debug('HTTP query socket error: {0}'.format(e))
743727
if retry_count > 0:
@@ -865,22 +849,10 @@ def get_http_raw(self, url=None, retry_count=3, headers=None,
865849
# Create the connection for the HTTP query.
866850
log.debug('HTTP query for {0} at {1}'.format(
867851
self.address_str, url))
868-
try:
869-
# Py 2 inspection alert bypassed by using kwargs dict.
870-
conn = Request(url=url, data=enc_form_data, headers=headers,
871-
**{'method': request_type})
872-
except TypeError: # pragma: no cover
873-
conn = Request(url=url, data=enc_form_data, headers=headers)
874-
data = self.opener.open(conn, timeout=self.timeout)
875-
876-
try:
877-
d = data.readall().decode('ascii', 'ignore')
878-
except AttributeError: # pragma: no cover
879-
d = data.read().decode('ascii', 'ignore')
880-
881-
return str(d)
852+
return self.http_client.request(url=url, data=enc_form_data, headers=headers,
853+
**{'method': request_type}).text
882854

883-
except (URLError, socket.timeout, socket.error) as e:
855+
except (InvalidURL, TransportError) as e:
884856

885857
log.debug('HTTP query socket error: {0}'.format(e))
886858
if retry_count > 0:

ipwhois/scripts/ipwhois_cli.py

+10-19
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,7 @@
3131
from ipwhois.hr import (HR_ASN, HR_RDAP, HR_RDAP_COMMON, HR_WHOIS,
3232
HR_WHOIS_NIR)
3333

34-
try: # pragma: no cover
35-
from urllib.request import (ProxyHandler,
36-
build_opener)
37-
except ImportError: # pragma: no cover
38-
from urllib2 import (ProxyHandler,
39-
build_opener)
34+
from httpx import Client
4035

4136
# CLI ANSI rendering
4237
ANSI = {
@@ -348,18 +343,15 @@ class IPWhoisCLI:
348343
An IPv4 or IPv6 address
349344
timeout (:obj:`int`): The default timeout for socket connections in
350345
seconds. Defaults to 5.
351-
proxy_http (:obj:`urllib.request.OpenerDirector`): The request for
352-
proxy HTTP support or None.
353-
proxy_https (:obj:`urllib.request.OpenerDirector`): The request for
354-
proxy HTTPS support or None.
346+
http_client (:obj:`httpx.Client`): The httpx.Client objects.
347+
Proxies and not only are here.
355348
"""
356349

357350
def __init__(
358351
self,
359352
addr,
360353
timeout,
361-
proxy_http,
362-
proxy_https
354+
http_client
363355
):
364356

365357
self.addr = addr
@@ -368,29 +360,28 @@ def __init__(
368360
handler_dict = None
369361
if proxy_http is not None:
370362

371-
handler_dict = {'http': proxy_http}
363+
handler_dict = {'http://*': proxy_http}
372364

373365
if proxy_https is not None:
374366

375367
if handler_dict is None:
376368

377-
handler_dict = {'https': proxy_https}
369+
handler_dict = {'https://*': proxy_https}
378370

379371
else:
380372

381-
handler_dict['https'] = proxy_https
373+
handler_dict['https://*'] = proxy_https
382374

383375
if handler_dict is None:
384376

385-
self.opener = None
377+
self.http_client = None
386378
else:
387379

388-
handler = ProxyHandler(handler_dict)
389-
self.opener = build_opener(handler)
380+
self.http_client = Client(proxies=handler_dict)
390381

391382
self.obj = IPWhois(address=self.addr,
392383
timeout=self.timeout,
393-
proxy_opener=self.opener)
384+
http_client=self.http_client)
394385

395386
def generate_output_header(self, query_type='RDAP'):
396387
"""

ipwhois/tests/online/test_experimental.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from ipwhois.tests import TestCommon
33
from ipwhois.exceptions import (ASNLookupError)
44
from ipwhois.experimental import (get_bulk_asn_whois, bulk_lookup_rdap)
5+
from httpx import Client
56

67
LOG_FORMAT = ('[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)s] '
78
'[%(funcName)s()] %(message)s')
@@ -39,18 +40,9 @@ def test_get_bulk_asn_whois(self):
3940

4041
def test_bulk_lookup_rdap(self):
4142

42-
try:
43-
from urllib.request import (OpenerDirector,
44-
ProxyHandler,
45-
build_opener)
46-
except ImportError:
47-
from urllib2 import (OpenerDirector,
48-
ProxyHandler,
49-
build_opener)
43+
from httpx import Client
5044

51-
handler = ProxyHandler()
52-
opener = build_opener(handler)
53-
bulk_lookup_rdap(addresses=['74.125.225.229'], proxy_openers=[opener])
45+
bulk_lookup_rdap(addresses=['74.125.225.229'], http_client=Client())
5446

5547
ips = [
5648
'74.125.225.229', # ARIN

ipwhois/tests/online/test_ipwhois.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,7 @@ def test_lookup_whois(self):
125125
break
126126

127127
def test_lookup_rdap(self):
128-
try:
129-
from urllib.request import ProxyHandler, build_opener
130-
except ImportError:
131-
from urllib2 import ProxyHandler, build_opener
128+
from httpx import Client
132129

133130
ips = [
134131
'74.125.225.229', # ARIN
@@ -169,8 +166,7 @@ def test_lookup_rdap(self):
169166
except Exception as e:
170167
self.fail('Unexpected exception raised: {0}'.format(e))
171168

172-
handler = ProxyHandler({'http': 'http://0.0.0.0:80/'})
173-
opener = build_opener(handler)
169+
http_client = Client(proxies={'http://*': 'http://0.0.0.0:80/'})
174170
result = IPWhois(address='74.125.225.229', timeout=0,
175-
proxy_opener=opener)
171+
http_client=http_client)
176172
self.assertRaises(ASNRegistryError, result.lookup_rdap)

ipwhois/tests/test_net.py

+6-14
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,12 @@ def test_timeout(self):
3838
result = Net('74.125.225.229')
3939
self.assertIsInstance(result.timeout, int)
4040

41-
def test_proxy_opener(self):
42-
try:
43-
from urllib.request import (OpenerDirector,
44-
ProxyHandler,
45-
build_opener)
46-
except ImportError:
47-
from urllib2 import (OpenerDirector,
48-
ProxyHandler,
49-
build_opener)
41+
def test_http_client(self):
42+
from httpx import Client
5043

5144
result = Net('74.125.225.229')
52-
self.assertIsInstance(result.opener, OpenerDirector)
45+
self.assertIsInstance(result.http_client, Client)
5346

54-
handler = ProxyHandler()
55-
opener = build_opener(handler)
56-
result = Net(address='74.125.225.229', proxy_opener=opener)
57-
self.assertIsInstance(result.opener, OpenerDirector)
47+
client = Client()
48+
result = Net(address='74.125.225.229', http_client=client)
49+
self.assertIsInstance(result.http_client, Client)

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767

6868
PACKAGE_DATA = {'ipwhois': ['data/*.xml', 'data/*.csv']}
6969

70-
INSTALL_REQUIRES = ['dnspython<=2.0.0', 'ipaddr==2.2.0;python_version<"3.3"']
70+
INSTALL_REQUIRES = ['dnspython<=2.0.0', 'ipaddr==2.2.0;python_version<"3.3"', 'httpx']
7171

7272
setup(
7373
name=NAME,

0 commit comments

Comments
 (0)