-
Notifications
You must be signed in to change notification settings - Fork 11
/
bitsigtest.py
97 lines (78 loc) · 2.8 KB
/
bitsigtest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import time # Importing the time library to check the time of code execution
import sys # Importing the System Library
import urllib.request, urllib.error, urllib.parse
import random
import hashlib
import os.path
import imghdr
import socket
timeout = 5
socket.setdefaulttimeout(timeout)
import hashlib
def sha256hex(s):
return hashlib.sha256(s).hexdigest()
def download_page(url):
version = (3, 0)
cur_version = sys.version_info
if cur_version >= version: # If the Current Version of Python is 3.0 or above
import urllib.request # urllib library for Extracting web pages
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
}
req = urllib.request.Request(url, headers=headers)
resp = urllib.request.urlopen(req)
return str(resp.read())
except Exception as e:
return None
else: # If the Current Version of Python is 2.x
import urllib.request, urllib.error, urllib.parse
try:
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
}
req = urllib.request.Request(url, headers=headers)
response = urllib.request.urlopen(req)
return response.read()
except:
return None
def _get_next_item(s):
# start_line = s.find('rg_di')
# if start_line == -1: #If no links are found then give an error!
# end_quote = 0
# link = "no_links"
# del start_line
# return link, end_quote
# else:
ltag = 5
start_line = s.find("<td id='phrasecol'>")
start_content = s.find("<pre>", start_line)
end_content = s.find("</pre>", start_content)
content_raw = str(s[start_content + ltag : end_content - 1])
# print start_line,start_content,end_content
# del start_line
# del start_content
if start_line == -1:
content_raw = "no_links"
return content_raw, end_content
def _get_all_items(page):
items = []
# print len(page)
while True:
item, end_content = _get_next_item(page)
if item == "no_links":
break
items.append(item) # Append all the links in the list named 'Links'
# time.sleep(0.1) #Timer could be used to slow down the request for image downloads
page = page[end_content:]
if len(item) > 10000:
break
return items
for i in range(0, 15):
url = "https://bitsig.io/?page=%d" % i
raw_html = download_page(url)
items = _get_all_items(raw_html)
for item in items:
item = item.replace("\n\r", "\r")
# print item.encode('hex')
print(sha256hex(item))