-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathScrap_WhoIS.py
62 lines (47 loc) · 1.52 KB
/
Scrap_WhoIS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
'''Get whois results'''
import scraptools
import re
import pprint
def get_whois_html(url):
base_url = 'http://www.whois.com/whois/'
query = url.replace('http://www.', '').replace('https://www.', '')
search_url = base_url + query
html = scraptools.getUrlContent(search_url)
return html
def get_whois_result(html):
result = '\n'.join(re.findall('whois_result.*?>(.*?)</div>', html))
result = result.replace('<br>', '\n').replace(' ', ' ')
# results = re.sub('Name servers:(.+\w{4}.+?(\s+)[0-9.]+)+', ': ', results, flags=re.DOTALL)
return result
def result_to_dict(result):
ret = {}
subd = None
for line in result.splitlines():
comps = line.split(': ')
if len(comps) == 1 and len(comps[0]) and comps[0][-1] == ':':
subd = dict()
ret[comps[0].strip(':')] = subd
if len(comps) == 2:
k, v = map(str.strip, comps)
if len(k) == len(comps[0]):
ret[k] = v
else:
subd[k] = v
return ret
def test_whois(use_cache):
if use_cache:
html = open('result.txt').read()
else:
html = get_whois_html('polymtl.ca')
with open('result.txt', 'w') as f:
f.write(html)
result = get_whois_result(html)
infos = result_to_dict(result)
pprint.pprint(infos)
def main():
html = get_whois_html('polymtl.ca')
result = get_whois_result(html)
infos = result_to_dict(result)
pprint.pprint(infos)
if __name__ == '__main__':
main()