From 616b13b885182809fcedcd88c344252f3be4957f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?unde=C6=92ined?= Date: Sat, 25 May 2024 18:40:05 +0200 Subject: [PATCH 1/3] CHANGE: Default VSC debugger --- .vscode/launch.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ac3cf37..6da0e4e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,15 +6,15 @@ "configurations": [ { "name": "Start: Example", - "type": "python", + "type": "debugpy", "request": "launch", - "program": "${workspaceFolder}/example.py", + "program": "${workspaceFolder}/ipython_config.py", "console": "integratedTerminal", "justMyCode": true }, { "name": "Test: Pytest", - "type": "python", + "type": "debugpy", "request": "launch", "module": "pytest", "justMyCode": true From 8089baf7ce9522366ac58aedd124a6ec288d8d38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?unde=C6=92ined?= Date: Sat, 25 May 2024 18:40:20 +0200 Subject: [PATCH 2/3] CHANGE: Add prefix to logger --- aproxyrelay/__init__.py | 6 +++--- aproxyrelay/core.py | 20 ++++++++++---------- aproxyrelay/process.py | 4 ++-- aproxyrelay/req.py | 15 +++++++++------ 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/aproxyrelay/__init__.py b/aproxyrelay/__init__.py index fd31544..d9a64ca 100644 --- a/aproxyrelay/__init__.py +++ b/aproxyrelay/__init__.py @@ -88,7 +88,7 @@ async def _main(self) -> Queue: if self.proxies.qsize() > 0: await self.process_targets() else: - self.logger.error('Could not establish any available proxy! Please try again later.') + self.logger.error('[aProxyRelay] Could not establish any available proxy! Please try again later.') return self._queue_result def start(self) -> Queue: @@ -99,13 +99,13 @@ def start(self) -> Queue: Queue: A queue containing the scraped data from the API. """ self.started = datetime.now(UTC) - self.logger.info(f'Started proxy relay at {self.started} ... Please wait ...!') + self.logger.info(f'[aProxyRelay] Started proxy relay at {self.started} ... Please wait ...!') loop = get_event_loop() loop.set_debug(self.debug) results = loop.run_until_complete(gather(self._main())) result = results.pop() - self.logger.info(f'Data scraped! Took {datetime.now(UTC) - self.started}, enjoy!') + self.logger.info(f'[aProxyRelay] Data scraped! Took {datetime.now(UTC) - self.started}, enjoy!') return result diff --git a/aproxyrelay/core.py b/aproxyrelay/core.py index 0ad574e..62c21ae 100644 --- a/aproxyrelay/core.py +++ b/aproxyrelay/core.py @@ -54,11 +54,11 @@ async def get_proxies(self) -> None: """ Asynchronously fill the self.proxies queue with fresh proxies. """ - self.logger.info('Initializing parsers ...') + self.logger.info('[aProxyRelay] Initializing parsers ...') ggs = [] scrapes = [] for item in proxy_list: - self.logger.info(f'Loading: {item["parser"].__name__}') + self.logger.info(f'[aProxyRelay] Loading: {item["parser"].__name__}') parser = item['parser'] for zone in self.zones: url = await parser.format_url(url=item['url'], zone=zone) @@ -68,34 +68,34 @@ async def get_proxies(self) -> None: scrapes.append(url) ggs = list(set(ggs)) scrapes = list(set(scrapes)) - self.logger.info(f'Parsers loaded: GG: {len(ggs)}, Other: {len(scrapes)}, Total: {len(ggs + scrapes)} ...') + self.logger.info(f'[aProxyRelay] Parsers loaded: GG: {len(ggs)}, Other: {len(scrapes)}, Total: {len(ggs + scrapes)} ...') if self.scrape: async with ClientSession(conn_timeout=self.timeout) as session: await self._fetch_proxy_page(scrapes, session) - self.logger.info(f'Scraper: Found {self._queue_filter.qsize()} competent proxy servers') + self.logger.info(f'[aProxyRelay] Scraper: Found {self._queue_filter.qsize()} competent proxy servers') else: - self.logger.info('Scraper: Skip discovery of new proxy servers ...') + self.logger.info('[aProxyRelay] Scraper: Skip discovery of new proxy servers ...') if self.filter and self.scrape: self.logger.info( - f'Validating: Proxies ({self._queue_filter.qsize()}), checking if proxies meet connection requirements ...' + f'[aProxyRelay] Validating: Proxies ({self._queue_filter.qsize()}), checking if proxies meet connection requirements ...' ) async with ClientSession(conn_timeout=15) as session: await self._test_all_proxies(session) - self.logger.info(f'Filter: Found {self._filtered_failed} incompetent and {self._filtered_available} available proxy servers in {datetime.now(UTC) - self.started}') # noqa: B950 + self.logger.info(f'[aProxyRelay] Filter: Found {self._filtered_failed} incompetent and {self._filtered_available} available proxy servers in {datetime.now(UTC) - self.started}') # noqa: B950 else: while not self._queue_filter.empty(): _target = self._queue_filter.get() _target['proxy'] = f"{_target['protocol'].replace('https', 'http')}://{_target['ip']}:{_target['port']}" self.proxies.put(_target) - self.logger.info('Filter: Skip tests for scraped proxy servers ...') + self.logger.info('[aProxyRelay] Filter: Skip tests for scraped proxy servers ...') async with ClientSession(conn_timeout=self.timeout) as session: await self._fetch_proxy_servers(ggs, session) - self.logger.info(f'Scraper: Found {self._filtered_ggs} additional available proxy servers') - self.logger.info(f'Found {self.proxies.qsize()} working proxies, took {datetime.now(UTC) - self.started}, Please wait...') + self.logger.info(f'[aProxyRelay] Scraper: Found {self._filtered_ggs} additional available proxy servers') + self.logger.info(f'[aProxyRelay] Found {self.proxies.qsize()} working proxies, took {datetime.now(UTC) - self.started}, Please wait...') async def process_targets(self) -> None: """ diff --git a/aproxyrelay/process.py b/aproxyrelay/process.py index cd1ed0e..971cde9 100644 --- a/aproxyrelay/process.py +++ b/aproxyrelay/process.py @@ -30,7 +30,7 @@ async def _process_targets_main(self) -> None: When they fail, we delete them from memory. Once the proxy queue is empty, we look for new proxies before we continue with our targets. """ - self.logger.info('Processing ...') + self.logger.info('[aProxyRelay] Processing ...') async with ClientSession( connector=ProxyConnector(remote_resolve=True), @@ -53,7 +53,7 @@ async def _process_targets_main(self) -> None: # Use asyncio.gather to concurrently execute all tasks await gather(*tasks) - self.logger.info(f'Processing ({self._queue_target_process.qsize()}) items in Queue ... Please wait...') + self.logger.info(f'[aProxyRelay] Processing ({self._queue_target_process.qsize()}) items in Queue ... Please wait...') if self.proxies.empty() and self._queue_target_process.qsize() > 0: await self.get_proxies() diff --git a/aproxyrelay/req.py b/aproxyrelay/req.py index a619c20..b4f4d4d 100644 --- a/aproxyrelay/req.py +++ b/aproxyrelay/req.py @@ -29,7 +29,7 @@ def __init__(self) -> None: """ Initialize an instance of AProxyRelayRequests. """ - self.logger.debug("AProxyRelay Request module initialized!") + self.logger.info("[aProxyRelay] Request module initialized!") async def _fetch_proxy_page(self, urls, session): """ @@ -57,7 +57,7 @@ async def _request_proxy_page(self, url, session) -> None: return async with session.get(url, headers=self._get_header()) as response: - self.logger.info(f"Scraper: {url}, Status Code: {response.status}") + self.logger.info(f"[aProxyRelay] Scraper: {url}, Status Code: {response.status}") if response.status == 200: new_queue = await parser.scrape(parser.zone, response) while not new_queue.empty(): @@ -151,7 +151,7 @@ async def _request_proxy_servers(self, url, session) -> None: zone = url.split('zone=')[1].split('&')[0] async with session.get(url, headers=self._get_header()) as response: - self.logger.info(f"Scraper: {url}, Status Code: {response.status}") + self.logger.info(f"[aProxyRelay] Scraper: {url}, Status Code: {response.status}") if response.status == 200: new_queue = await parser.scrape(zone, response) while not new_queue.empty(): @@ -181,10 +181,13 @@ async def _obtain_targets(self, proxy_url, target, session) -> None: if status == 200: self.proxies.put(proxy_url) data = await response.json() - if pack := self.unpack(data, target): - self._queue_result.put(pack) + if data: + if pack := self.unpack(data, target): + self._queue_result.put(pack) + else: + self.logger.warning(f'[aProxyRelay] Could not unpack data for: {target}') else: - self.logger.warning(f'Could not unpack data for: {target}') + self.logger.warning(f'[aProxyRelay] Target {target} Data seems to be None: {data}') else: self._queue_target_process.put(target) From 604621fff358b93bf916ac0a83b3c22141b8ccf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?unde=C6=92ined?= Date: Sat, 25 May 2024 18:42:09 +0200 Subject: [PATCH 3/3] FIX: Flake8 violations --- aproxyrelay/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/aproxyrelay/core.py b/aproxyrelay/core.py index 62c21ae..efbb8b4 100644 --- a/aproxyrelay/core.py +++ b/aproxyrelay/core.py @@ -78,9 +78,7 @@ async def get_proxies(self) -> None: self.logger.info('[aProxyRelay] Scraper: Skip discovery of new proxy servers ...') if self.filter and self.scrape: - self.logger.info( - f'[aProxyRelay] Validating: Proxies ({self._queue_filter.qsize()}), checking if proxies meet connection requirements ...' - ) + self.logger.info(f'[aProxyRelay] Validating: Proxies ({self._queue_filter.qsize()}), checking if proxies meet connection requirements ...') # noqa: B950 async with ClientSession(conn_timeout=15) as session: await self._test_all_proxies(session) self.logger.info(f'[aProxyRelay] Filter: Found {self._filtered_failed} incompetent and {self._filtered_available} available proxy servers in {datetime.now(UTC) - self.started}') # noqa: B950 @@ -95,7 +93,7 @@ async def get_proxies(self) -> None: await self._fetch_proxy_servers(ggs, session) self.logger.info(f'[aProxyRelay] Scraper: Found {self._filtered_ggs} additional available proxy servers') - self.logger.info(f'[aProxyRelay] Found {self.proxies.qsize()} working proxies, took {datetime.now(UTC) - self.started}, Please wait...') + self.logger.info(f'[aProxyRelay] Found {self.proxies.qsize()} working proxies, took {datetime.now(UTC) - self.started}, Please wait...') # noqa: B950 async def process_targets(self) -> None: """