|
| 1 | +import queue |
| 2 | +from random import choice |
| 3 | +from re import findall |
| 4 | +from threading import Thread |
| 5 | +from traceback import print_exc |
| 6 | + |
| 7 | +from requests import get |
| 8 | + |
| 9 | +import Config |
| 10 | +import ProxiesDataBase |
| 11 | + |
| 12 | +q = queue.Queue() |
| 13 | + |
| 14 | +user_agents = [ |
| 15 | + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36", |
| 16 | + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36", |
| 17 | + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36", |
| 18 | + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", |
| 19 | + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", |
| 20 | + "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", |
| 21 | + "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", |
| 22 | + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", |
| 23 | + "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", |
| 24 | + "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", |
| 25 | + "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", |
| 26 | + "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", |
| 27 | + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", |
| 28 | + "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", |
| 29 | + "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", |
| 30 | + "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", |
| 31 | + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", |
| 32 | + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", |
| 33 | + "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", |
| 34 | + "Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1", |
| 35 | + "Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3", |
| 36 | + "Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12", |
| 37 | + "Opera/9.27 (Windows NT 5.2; U; zh-cn)", |
| 38 | + "Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Version/3.1 Safari/525.13", |
| 39 | + "Mozilla/5.0 (iPhone; U; CPU like Mac OS X) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/4A93 ", |
| 40 | + "Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 ", |
| 41 | + "Mozilla/5.0 (Linux; U; Android 3.2; ja-jp; F-01D Build/F0001) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13 ", |
| 42 | + "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; ja-jp) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7", |
| 43 | + "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_2_1 like Mac OS X; da-dk) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5 ", |
| 44 | + "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.9 (KHTML, like Gecko) Chrome/ Safari/530.9 ", |
| 45 | + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", |
| 46 | + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)", |
| 47 | + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/27.0.1453.93 Chrome/27.0.1453.93 Safari/537.36", |
| 48 | + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", |
| 49 | + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36" |
| 50 | +] |
| 51 | + |
| 52 | + |
| 53 | +def GetPageContent(tar_url): |
| 54 | + url_content = "" |
| 55 | + try: |
| 56 | + url_content = get(tar_url, |
| 57 | + headers={ |
| 58 | + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
| 59 | + 'Accept-Encoding': 'gzip, deflate, compress', |
| 60 | + 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ru;q=0.4', |
| 61 | + 'Cache-Control': 'no-cache', |
| 62 | + 'Connection': 'keep-alive', |
| 63 | + 'Upgrade-Insecure-Requests': "1", |
| 64 | + 'User-Agent': choice(user_agents) |
| 65 | + }).text |
| 66 | + except BaseException as e: |
| 67 | + print_exc() |
| 68 | + print('\n\n\n') |
| 69 | + finally: |
| 70 | + return url_content |
| 71 | + |
| 72 | + |
| 73 | +def GetIP(): |
| 74 | + ip_list = [] |
| 75 | + for tar_url in Config.Url_Regular.keys(): |
| 76 | + url_content = GetPageContent(tar_url) |
| 77 | + regular = Config.Url_Regular.get(tar_url, "") |
| 78 | + tmp_ip_list = findall(regular, url_content) |
| 79 | + for item in tmp_ip_list: |
| 80 | + ip_list.append("{}:{}".format(item[0], item[1])) |
| 81 | + # print(tar_url, "\niplist_len: ", ip_list.__len__()) |
| 82 | + |
| 83 | + thread_list = [] |
| 84 | + for item in ip_list: |
| 85 | + thread_list.append(Thread(target=VertifyIp, args=(item.split(':')[0], item.split(':')[1]))) |
| 86 | + for item in thread_list: |
| 87 | + item.start() |
| 88 | + for item in thread_list: |
| 89 | + item.join() |
| 90 | + |
| 91 | + #print("write into db") |
| 92 | + while not q.empty(): |
| 93 | + ProxiesDataBase.AddItem(q.get()) |
| 94 | + |
| 95 | + |
| 96 | +def RefreshDB(): |
| 97 | + ip_list = ProxiesDataBase.GetItems() |
| 98 | + thread_list = [] |
| 99 | + |
| 100 | + for item in ip_list: |
| 101 | + thread_list.append(Thread(target=VertifyIp, args=[item.split(':')[0], item.split(':')[1]])) |
| 102 | + for item in thread_list: |
| 103 | + item.start() |
| 104 | + for item in thread_list: |
| 105 | + item.join() |
| 106 | + |
| 107 | + #print("write into db") |
| 108 | + while not q.empty(): |
| 109 | + ProxiesDataBase.AddItem(q.get()) |
| 110 | + |
| 111 | + |
| 112 | +def VertifyIp(ip, port): |
| 113 | + proxies = {"http": "http://{}:{}".format(ip, port), "https": "https://{}:{}".format(ip, port)} |
| 114 | + #print("Vertify IP: {}:{}".format(ip, port)) |
| 115 | + try: |
| 116 | + url_content = get(Config.TestUrl, |
| 117 | + proxies=proxies, |
| 118 | + timeout=Config.TestTimeOut, |
| 119 | + headers={ |
| 120 | + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
| 121 | + 'Accept-Encoding': 'gzip, deflate, compress', |
| 122 | + 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,ru;q=0.4', |
| 123 | + 'Cache-Control': 'max-age=0', |
| 124 | + 'Connection': 'keep-alive', |
| 125 | + 'User-Agent': choice(user_agents) |
| 126 | + }) |
| 127 | + |
| 128 | + if int(url_content.status_code) == int(200) and "新闻" in url_content.text: |
| 129 | + q.put("{}:{}".format(ip, port)) |
| 130 | + except BaseException as e: |
| 131 | + pass |
0 commit comments