Skip to content

Commit

Permalink
Merge pull request #74 from my-dev-app/feature/additional_proxy_lists
Browse files Browse the repository at this point in the history
Feature/additional proxy lists
  • Loading branch information
0x78f1935 authored Sep 3, 2024
2 parents 461469f + 0054599 commit ee98645
Show file tree
Hide file tree
Showing 14 changed files with 333 additions and 6 deletions.
3 changes: 2 additions & 1 deletion aproxyrelay/req.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ async def _test_proxy_link(self, proxy_url, data, session) -> None:
ClientOSError,
ServerTimeoutError,
InvalidURL,
ConnectionResetError,
):
self._filtered_failed = self._filtered_failed + 1

Expand Down Expand Up @@ -178,7 +179,7 @@ async def _obtain_targets(self, proxy_url, target, session) -> None:
},
) as response:
status = response.status
if status == 200:
if status in (200, 202,):
self.proxies.put(proxy_url)
data = await response.json()
if data:
Expand Down
25 changes: 25 additions & 0 deletions aproxyrelay/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
from .parser_murongpig_proxy_master_http import ParserMurongpigProxyMasterHttp
from .parser_murongpig_proxy_master_socks4 import ParserMurongpigProxyMasterSocks4
from .parser_murongpig_proxy_master_socks5 import ParserMurongpigProxyMasterSocks5
from .parser_geonode import ParserGeonodeProxy
from .parser_proxyscraper_com import ParserScraperComProxy
from .parser_the_speed_http import ParserTheSpeedHTTPProxy
from .parser_the_speed_sock4 import ParserTheSpeedSock4Proxy
from .parser_the_speed_sock5 import ParserTheSpeedSock5Proxy


proxy_list = [
Expand Down Expand Up @@ -108,4 +113,24 @@
'url': 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',
'parser': ParserMurongpigProxyMasterSocks5,
},
{
'url': 'https://proxylist.geonode.com/api/proxy-list?country=US&limit=500&page=1&sort_by=lastChecked&sort_type=desc',
'parser': ParserGeonodeProxy
},
{
'url': 'https://api.proxyscrape.com/v3/free-proxy-list/get?request=displayproxies&country=us&proxy_format=protocolipport&format=text&timeout=20000', # noqa: B950
'parser': ParserScraperComProxy
},
{
'url': 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt',
'parser': ParserTheSpeedHTTPProxy
},
{
'url': 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt',
'parser': ParserTheSpeedSock4Proxy
},
{
'url': 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt',
'parser': ParserTheSpeedSock5Proxy
},
]
2 changes: 1 addition & 1 deletion aproxyrelay/scrapers/parser_freeproxylist.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
"""Data formatter, formats data and returns is back in the process Queue"""
if data['Code'] == zone.upper():
queue.put({
'zone': data['Code'],
'zone': data['Code'].upper(),
'method': 'http' if data['Https'] != 'yes' else 'https',
'anonymity': 'anonymous' if data['Anonymity'].lower() in ['elite', 'anonymous', 'elite proxy'] else 'transparent',
'protocol': 'http' if data['Https'] != 'yes' else 'https',
Expand Down
48 changes: 48 additions & 0 deletions aproxyrelay/scrapers/parser_geonode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# -*- mode: python ; coding: utf-8 -*-
"""
░░ ░░ ░░ ░░░ ░░ ░░░░ ░ ░░░░ ░ ░░ ░ ░░░░░░░░ ░░ ░░░░ ░
▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒▒ ▒▒ ▒▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒
▓ ▓▓▓▓ ▓ ▓▓ ▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓▓▓ ▓▓▓ ▓▓ ▓▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓
█ █ ███████ ███ ██ ████ ██ ██ █████ ████ ███ ██ ███████ ███████ ████ ████
█ ████ █ ███████ ████ ██ ██ ████ ████ ████ ████ █ █ █ ████ ████ ████
By undeƒined
------------
Main parser example, other parsers can inherit from this class
"""
from queue import Queue

from .parser import MainScraper


class ParserGeonodeProxy(MainScraper):
def __init__(self) -> None:
MainScraper.__init__(self)
self.zone = None

@classmethod
async def format_url(cls, url, *args, **kwargs) -> str:
"""Formats URL before scraping, let us adjust query parameters for each parser"""
cls.zone = kwargs.get("country", "US")
return url.replace('country=US', f'country={cls.zone.upper()}')

@classmethod
async def format_raw(cls, html: str) -> list:
"""Parse text/html pages, customized method for the parser of this website"""
return

@classmethod
async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
"""Data formatter, formats data and returns is back in the process Queue"""
for item in data['data']:
queue.put(
{
'zone': zone.upper(),
'method': item['protocols'][0],
'anonymity': 'unknown',
'protocol': item['protocols'][0],
'port': item['port'],
'ip': item['ip']
}
)
return queue
2 changes: 1 addition & 1 deletion aproxyrelay/scrapers/parser_lumiproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
for item in data['data']['list']:
if item['country_code'] == zone.upper():
queue.put({
'zone': item['country_code'],
'zone': item['country_code'].upper(),
'method': cls._get_protocol(item['protocol']),
'anonymity': 'transparent' if item['anonymity'] not in [1, 2] else 'anonymous',
'protocol': cls._get_protocol(item['protocol']),
Expand Down
2 changes: 1 addition & 1 deletion aproxyrelay/scrapers/parser_proxyscrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
for item in data['proxies']:
if item['ip_data']['countryCode'] == zone.upper():
queue.put({
'zone': item['ip_data']['countryCode'],
'zone': item['ip_data']['countryCode'].upper(),
'method': item['protocol'],
'anonymity': 'anonymous' if item['anonymity'] in ['elite', 'elite proxy', 'anonymous'] else 'transparent',
'protocol': item['protocol'],
Expand Down
53 changes: 53 additions & 0 deletions aproxyrelay/scrapers/parser_proxyscraper_com.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# -*- mode: python ; coding: utf-8 -*-
"""
░░ ░░ ░░ ░░░ ░░ ░░░░ ░ ░░░░ ░ ░░ ░ ░░░░░░░░ ░░ ░░░░ ░
▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒▒ ▒▒ ▒▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒
▓ ▓▓▓▓ ▓ ▓▓ ▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓▓▓ ▓▓▓ ▓▓ ▓▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓
█ █ ███████ ███ ██ ████ ██ ██ █████ ████ ███ ██ ███████ ███████ ████ ████
█ ████ █ ███████ ████ ██ ██ ████ ████ ████ ████ █ █ █ ████ ████ ████
By undeƒined
------------
Main parser example, other parsers can inherit from this class
"""
from queue import Queue

from .parser import MainScraper


class ParserScraperComProxy(MainScraper):
def __init__(self) -> None:
MainScraper.__init__(self)
self.zone = None

@classmethod
async def format_url(cls, url, *args, **kwargs) -> str:
"""Formats URL before scraping, let us adjust query parameters for each parser"""
cls.zone = kwargs.get("country", "us")
return url

@classmethod
async def format_raw(cls, html: str) -> list:
"""Parse text/html pages, customized method for the parser of this website"""
result = []
data = html.split('\r\n')
for proxy in data:
if proxy:
protocol = proxy.split('://')[0]
ip = proxy.split('://')[1].split(':')[0]
port = proxy.split('://')[1].split(':')[-1]
result.append({
'zone': cls.zone.upper(),
'method': protocol,
'anonymity': 'unknown',
'protocol': protocol,
'port': port,
'ip': ip
})
return result

@classmethod
async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
"""Data formatter, formats data and returns is back in the process Queue"""
queue.put(data)
return queue
2 changes: 1 addition & 1 deletion aproxyrelay/scrapers/parser_socks_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
"""Data formatter, formats data and returns is back in the process Queue"""
if data['Code'] == zone.upper():
queue.put({
'zone': data['Code'],
'zone': data['Code'].upper(),
'method': data['Version'].lower(),
'anonymity': 'anonymous' if data['Anonymity'].lower() in ['elite', 'anonymous', 'elite proxy'] else 'transparent',
'protocol': data['Version'].lower(),
Expand Down
2 changes: 1 addition & 1 deletion aproxyrelay/scrapers/parser_ssl_proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
"""Data formatter, formats data and returns is back in the process Queue"""
if data['Code'] == zone.upper():
queue.put({
'zone': data['Code'],
'zone': data['Code'].upper(),
'method': 'https',
'anonymity': 'anonymous' if data['Anonymity'].lower() in ['elite', 'anonymous', 'elite proxy'] else 'transparent',
'protocol': 'https',
Expand Down
53 changes: 53 additions & 0 deletions aproxyrelay/scrapers/parser_the_speed_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# -*- mode: python ; coding: utf-8 -*-
"""
░░ ░░ ░░ ░░░ ░░ ░░░░ ░ ░░░░ ░ ░░ ░ ░░░░░░░░ ░░ ░░░░ ░
▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒▒ ▒▒ ▒▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒
▓ ▓▓▓▓ ▓ ▓▓ ▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓▓▓ ▓▓▓ ▓▓ ▓▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓
█ █ ███████ ███ ██ ████ ██ ██ █████ ████ ███ ██ ███████ ███████ ████ ████
█ ████ █ ███████ ████ ██ ██ ████ ████ ████ ████ █ █ █ ████ ████ ████
By undeƒined
------------
Main parser example, other parsers can inherit from this class
"""
from queue import Queue

from .parser import MainScraper


class ParserTheSpeedHTTPProxy(MainScraper):
def __init__(self) -> None:
MainScraper.__init__(self)
self.zone = None

@classmethod
async def format_url(cls, url, *args, **kwargs) -> str:
"""Formats URL before scraping, let us adjust query parameters for each parser"""
cls.zone = kwargs.get("country", "us")
return url

@classmethod
async def format_raw(cls, html: str) -> list:
"""Parse text/html pages, customized method for the parser of this website"""
result = []
data = html.split('\n')
for proxy in data:
if proxy:
protocol = 'http'
ip = proxy.split(':')[0]
port = proxy.split(':')[-1]
result.append({
'zone': cls.zone.upper(),
'method': protocol,
'anonymity': 'unknown',
'protocol': protocol,
'port': port,
'ip': ip
})
return result

@classmethod
async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
"""Data formatter, formats data and returns is back in the process Queue"""
queue.put(data)
return queue
53 changes: 53 additions & 0 deletions aproxyrelay/scrapers/parser_the_speed_sock4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# -*- mode: python ; coding: utf-8 -*-
"""
░░ ░░ ░░ ░░░ ░░ ░░░░ ░ ░░░░ ░ ░░ ░ ░░░░░░░░ ░░ ░░░░ ░
▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒▒ ▒▒ ▒▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒
▓ ▓▓▓▓ ▓ ▓▓ ▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓▓▓ ▓▓▓ ▓▓ ▓▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓
█ █ ███████ ███ ██ ████ ██ ██ █████ ████ ███ ██ ███████ ███████ ████ ████
█ ████ █ ███████ ████ ██ ██ ████ ████ ████ ████ █ █ █ ████ ████ ████
By undeƒined
------------
Main parser example, other parsers can inherit from this class
"""
from queue import Queue

from .parser import MainScraper


class ParserTheSpeedSock4Proxy(MainScraper):
def __init__(self) -> None:
MainScraper.__init__(self)
self.zone = None

@classmethod
async def format_url(cls, url, *args, **kwargs) -> str:
"""Formats URL before scraping, let us adjust query parameters for each parser"""
cls.zone = kwargs.get("country", "us")
return url

@classmethod
async def format_raw(cls, html: str) -> list:
"""Parse text/html pages, customized method for the parser of this website"""
result = []
data = html.split('\n')
for proxy in data:
if proxy:
protocol = 'http'
ip = proxy.split(':')[0]
port = proxy.split(':')[-1]
result.append({
'zone': cls.zone.upper(),
'method': protocol,
'anonymity': 'unknown',
'protocol': protocol,
'port': port,
'ip': ip
})
return result

@classmethod
async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
"""Data formatter, formats data and returns is back in the process Queue"""
queue.put(data)
return queue
53 changes: 53 additions & 0 deletions aproxyrelay/scrapers/parser_the_speed_sock5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# -*- mode: python ; coding: utf-8 -*-
"""
░░ ░░ ░░ ░░░ ░░ ░░░░ ░ ░░░░ ░ ░░ ░ ░░░░░░░░ ░░ ░░░░ ░
▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒▒ ▒▒ ▒▒ ▒▒▒▒ ▒ ▒▒▒▒▒▒▒ ▒▒▒▒▒▒▒ ▒▒▒▒ ▒▒ ▒▒ ▒▒
▓ ▓▓▓▓ ▓ ▓▓ ▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓▓▓ ▓▓▓ ▓▓ ▓▓▓ ▓▓▓▓▓▓▓ ▓▓▓▓ ▓▓▓ ▓▓▓
█ █ ███████ ███ ██ ████ ██ ██ █████ ████ ███ ██ ███████ ███████ ████ ████
█ ████ █ ███████ ████ ██ ██ ████ ████ ████ ████ █ █ █ ████ ████ ████
By undeƒined
------------
Main parser example, other parsers can inherit from this class
"""
from queue import Queue

from .parser import MainScraper


class ParserTheSpeedSock5Proxy(MainScraper):
def __init__(self) -> None:
MainScraper.__init__(self)
self.zone = None

@classmethod
async def format_url(cls, url, *args, **kwargs) -> str:
"""Formats URL before scraping, let us adjust query parameters for each parser"""
cls.zone = kwargs.get("country", "us")
return url

@classmethod
async def format_raw(cls, html: str) -> list:
"""Parse text/html pages, customized method for the parser of this website"""
result = []
data = html.split('\n')
for proxy in data:
if proxy:
protocol = 'http'
ip = proxy.split(':')[0]
port = proxy.split(':')[-1]
result.append({
'zone': cls.zone.upper(),
'method': protocol,
'anonymity': 'unknown',
'protocol': protocol,
'port': port,
'ip': ip
})
return result

@classmethod
async def format_data(cls, zone: str, data: dict, queue: Queue) -> None:
"""Data formatter, formats data and returns is back in the process Queue"""
queue.put(data)
return queue
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
'build',
'flake8-bugbear',
'pytest',
'pytest-asyncio',
'pytest-cov',
'pytest-sugar',
'pytest-xdist',
Expand Down
Loading

0 comments on commit ee98645

Please sign in to comment.