Skip to content

Commit a88567f

Browse files
committed
Initial commit
0 parents  commit a88567f

File tree

5 files changed

+308
-0
lines changed

5 files changed

+308
-0
lines changed

Config.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
DBName = "PROXIES.db" # 数据库名称
2+
TabelName = "IPPORT" # 表
3+
TestTimeOut = 10 # 检测IP可用性设置的超时
4+
TestUrl = "https://www.baidu.com/" # 用以检测的网站
5+
# 头部代理S
6+
UserAgents = [
7+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
8+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
9+
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
10+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
11+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
12+
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
13+
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
14+
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
15+
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
16+
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
17+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
18+
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
19+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
20+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
21+
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
22+
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
23+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
24+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
25+
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
26+
"Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1",
27+
"Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3",
28+
"Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12",
29+
"Opera/9.27 (Windows NT 5.2; U; zh-cn)",
30+
"Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Version/3.1 Safari/525.13",
31+
"Mozilla/5.0 (iPhone; U; CPU like Mac OS X) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/4A93 ",
32+
"Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 ",
33+
"Mozilla/5.0 (Linux; U; Android 3.2; ja-jp; F-01D Build/F0001) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13 ",
34+
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; ja-jp) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7",
35+
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_2_1 like Mac OS X; da-dk) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5 ",
36+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.9 (KHTML, like Gecko) Chrome/ Safari/530.9 ",
37+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
38+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
39+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/27.0.1453.93 Chrome/27.0.1453.93 Safari/537.36",
40+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
41+
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36"
42+
]
43+
# 代理IP网址和对应的正则式,正则式一定要IP和Port分开获取,例如[(192.168.1.1,80),(192.168.1.1,90),]
44+
# 可自行添加
45+
# 只抓取首页,想要抓取后面的可以将链接和正则式贴上来
46+
Url_Regular = {
47+
"http://www.kuaidaili.com/free/": "IP\">([\d\.]+)</td>\s*<td data-title=\"PORT\">(\d+)</td>",
48+
"http://www.66ip.cn/nmtq.php?getnum=512&isp=0&anonymoustype=0&start=&ports=&export=&ipaddress=&area=0&proxytype=2&api=66ip": "([\d\.]+):(\d+)",
49+
"http://www.xicidaili.com/nn/": "<td>([\d\.]+)</td>\s*<td>(\d+)</td>",
50+
"http://www.ip3366.net/free/": "<td>([\d\.]+)</td>\s*<td>(\d+)</td>",
51+
"http://www.proxy360.cn/Region/China": ">\s*([\d\.]+)\s*</span>\s*.*width:50px;\">\s*(\d+)\s*</span>",
52+
"http://www.mimiip.com/": "<tr>\s+<td>([\d\.]+)</td>\s+<td>(\d+)</td>",
53+
"http://www.data5u.com/free/index.shtml": "<li>([\d\.]+)</li></span>\s+<span style=\"width: 100px;\"><li class=\".*\">(\d+)</li>",
54+
"http://www.ip181.com/": "<tr.*>\s+<td>([\d\.]+)</td>\s+<td>([\d]+)</td>",
55+
"http://www.kxdaili.com/": "<tr.*>\s+<td>([\d\.]+)</td>\s+<td>([\d]+)</td>",
56+
}

GetIP.py

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import queue
2+
from random import choice
3+
from re import findall
4+
from threading import Thread
5+
from traceback import print_exc
6+
7+
from requests import get
8+
9+
import Config
10+
import ProxiesDataBase
11+
12+
q = queue.Queue()
13+
14+
user_agents = [
15+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
16+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
17+
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
18+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
19+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
20+
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
21+
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
22+
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
23+
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
24+
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
25+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
26+
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
27+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
28+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
29+
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
30+
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
31+
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
32+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
33+
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
34+
"Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1",
35+
"Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3",
36+
"Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12",
37+
"Opera/9.27 (Windows NT 5.2; U; zh-cn)",
38+
"Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Version/3.1 Safari/525.13",
39+
"Mozilla/5.0 (iPhone; U; CPU like Mac OS X) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/4A93 ",
40+
"Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 ",
41+
"Mozilla/5.0 (Linux; U; Android 3.2; ja-jp; F-01D Build/F0001) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13 ",
42+
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; ja-jp) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7",
43+
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_2_1 like Mac OS X; da-dk) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5 ",
44+
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.9 (KHTML, like Gecko) Chrome/ Safari/530.9 ",
45+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
46+
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
47+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/27.0.1453.93 Chrome/27.0.1453.93 Safari/537.36",
48+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
49+
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36"
50+
]
51+
52+
53+
def GetPageContent(tar_url):
54+
url_content = ""
55+
try:
56+
url_content = get(tar_url,
57+
headers={
58+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
59+
'Accept-Encoding': 'gzip, deflate, compress',
60+
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ru;q=0.4',
61+
'Cache-Control': 'no-cache',
62+
'Connection': 'keep-alive',
63+
'Upgrade-Insecure-Requests': "1",
64+
'User-Agent': choice(user_agents)
65+
}).text
66+
except BaseException as e:
67+
print_exc()
68+
print('\n\n\n')
69+
finally:
70+
return url_content
71+
72+
73+
def GetIP():
74+
ip_list = []
75+
for tar_url in Config.Url_Regular.keys():
76+
url_content = GetPageContent(tar_url)
77+
regular = Config.Url_Regular.get(tar_url, "")
78+
tmp_ip_list = findall(regular, url_content)
79+
for item in tmp_ip_list:
80+
ip_list.append("{}:{}".format(item[0], item[1]))
81+
# print(tar_url, "\niplist_len: ", ip_list.__len__())
82+
83+
thread_list = []
84+
for item in ip_list:
85+
thread_list.append(Thread(target=VertifyIp, args=(item.split(':')[0], item.split(':')[1])))
86+
for item in thread_list:
87+
item.start()
88+
for item in thread_list:
89+
item.join()
90+
91+
#print("write into db")
92+
while not q.empty():
93+
ProxiesDataBase.AddItem(q.get())
94+
95+
96+
def RefreshDB():
97+
ip_list = ProxiesDataBase.GetItems()
98+
thread_list = []
99+
100+
for item in ip_list:
101+
thread_list.append(Thread(target=VertifyIp, args=[item.split(':')[0], item.split(':')[1]]))
102+
for item in thread_list:
103+
item.start()
104+
for item in thread_list:
105+
item.join()
106+
107+
#print("write into db")
108+
while not q.empty():
109+
ProxiesDataBase.AddItem(q.get())
110+
111+
112+
def VertifyIp(ip, port):
113+
proxies = {"http": "http://{}:{}".format(ip, port), "https": "https://{}:{}".format(ip, port)}
114+
#print("Vertify IP: {}:{}".format(ip, port))
115+
try:
116+
url_content = get(Config.TestUrl,
117+
proxies=proxies,
118+
timeout=Config.TestTimeOut,
119+
headers={
120+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
121+
'Accept-Encoding': 'gzip, deflate, compress',
122+
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ru;q=0.4',
123+
'Cache-Control': 'max-age=0',
124+
'Connection': 'keep-alive',
125+
'User-Agent': choice(user_agents)
126+
})
127+
128+
if int(url_content.status_code) == int(200) and "新闻" in url_content.text:
129+
q.put("{}:{}".format(ip, port))
130+
except BaseException as e:
131+
pass

ProxiesDataBase.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# table IPPORT
2+
# ip_port TEXT NOT NULL
3+
import random
4+
import sqlite3
5+
from threading import Thread
6+
import traceback
7+
import Config
8+
import GetIP
9+
10+
11+
def InitDB():
12+
db_conn = sqlite3.connect(Config.DBName)
13+
try:
14+
db_conn.execute(
15+
"""CREATE TABLE IF NOT EXISTS {} (IP_PORT TEXT NOT NULL);""".format(Config.TabelName))
16+
db_conn.commit()
17+
return True
18+
except BaseException as e:
19+
db_conn.rollback()
20+
traceback.print_exc()
21+
return False
22+
finally:
23+
db_conn.close()
24+
25+
26+
def AddItem(ip_port):
27+
db_conn = sqlite3.connect(Config.DBName)
28+
db_cursor = db_conn.cursor()
29+
30+
try:
31+
db_conn.execute("""INSERT INTO {} VALUES ('{}');""".format(Config.TabelName, ip_port))
32+
db_conn.commit()
33+
except BaseException as e:
34+
db_conn.rollback()
35+
traceback.print_exc()
36+
db_conn.close()
37+
38+
39+
def DelItem(item):
40+
db_conn = sqlite3.connect(Config.DBName)
41+
42+
try:
43+
db_conn.execute("""DELETE FROM {} WHERE IP_PORT = '{}';""".format(Config.TabelName, item))
44+
db_conn.commit()
45+
except BaseException as e:
46+
db_conn.rollback()
47+
traceback.print_exc()
48+
finally:
49+
db_conn.close()
50+
51+
52+
def ClearItems():
53+
db_conn = sqlite3.connect(Config.DBName)
54+
try:
55+
db_conn.execute("""DELETE FROM {};""".format(Config.TabelName))
56+
db_conn.commit()
57+
except BaseException as e:
58+
db_conn.rollback()
59+
traceback.print_exc()
60+
finally:
61+
db_conn.close()
62+
63+
64+
def GetItems():
65+
ip_list = []
66+
db_conn = sqlite3.connect(Config.DBName)
67+
db_cur = db_conn.cursor()
68+
try:
69+
tmp = db_cur.execute("""SELECT * FROM {};""".format(Config.TabelName)).fetchall()
70+
for item in tmp:
71+
ip_list.append(item[0])
72+
except BaseException as e:
73+
traceback.print_exc()
74+
finally:
75+
db_conn.close()
76+
return ip_list
77+
78+
79+

Util.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import random
2+
3+
import ProxiesDataBase
4+
import GetIP
5+
import re
6+
7+
def Refresh():
8+
GetIP.RefreshDB()
9+
GetIP.GetIP()
10+
11+
def Get():
12+
proxies_dict = {}
13+
result = ProxiesDataBase.GetItems()
14+
if result:
15+
tmp = random.choice(result)
16+
proxies_dict['http'] = 'http://{}'.format(tmp)
17+
proxies_dict['https'] = 'https://{}'.format(tmp)
18+
return proxies_dict
19+

demo.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import sqlite3
2+
3+
import Config
4+
import ProxiesDataBase
5+
import Util
6+
7+
8+
9+
if __name__ == '__main__':
10+
# 初始化数据库和数据表
11+
ProxiesDataBase.InitDB()
12+
# 刷新数据库,添加新数据
13+
Util.Refresh()
14+
# 获取一个代理使用
15+
proxies = Util.Get()
16+
print(proxies)
17+
18+
# 查询数据库多少条数据
19+
conn = sqlite3.connect(Config.DBName)
20+
cu = conn.cursor()
21+
print(cu.execute("""SELECT * FROM {};""".format(Config.TabelName)).fetchall().__len__())
22+
cu.close()
23+
conn.close()

0 commit comments

Comments
 (0)