Skip to content

Commit f25e4d6

Browse files
authored
Merge pull request #14 from b08of1sh/b08of1sh-patch-1
Optimize invalid page filtering by calculating hash
2 parents d27ab65 + 7b5b8a8 commit f25e4d6

File tree

1 file changed

+31
-4
lines changed

1 file changed

+31
-4
lines changed

scanners/path_detector.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import ssl
2121
from urllib3.exceptions import InsecureRequestWarning
2222
import warnings
23+
import hashlib
2324

2425
# 禁用urllib3中的不安全请求警告
2526
warnings.simplefilter('ignore', InsecureRequestWarning)
@@ -49,18 +50,24 @@ class PathDetector:
4950
SSE_MAX_SIZE = 5120 # 5KB
5051
MAX_RESPONSE_LENGTH = 102400 # 100KB
5152
PATH_THREAD_COUNT = 3 # 使用独立的3个线程池进行路径探测
53+
HASH_THRESHOLD = 5 # 哈希值重复次数阈值
5254

5355
def __init__(self, paths, proxy_manager):
5456
self.paths = paths
5557
self.proxy = proxy_manager.get_proxy()
5658
self.thread_local = threading.local() # 创建线程本地存储
59+
self.hash_counter = {} # 哈希值计数器
60+
self.lock = threading.Lock() # 用于线程安全的锁
5761

5862
def detect(self, url):
5963
"""检测指定URL的敏感路径"""
6064
path_failed_count = 0
6165
path_success_count = 0
6266
detected_paths = []
6367

68+
# 重置哈希值计数器
69+
self.hash_counter = {}
70+
6471
# 使用独立的线程池进行路径探测,并指定最大线程数为3
6572
with ThreadPoolExecutor(max_workers=self.PATH_THREAD_COUNT) as executor:
6673
futures = {executor.submit(self._detect_path, url, path, signature): path for path, signature in self.paths.items()}
@@ -108,15 +115,35 @@ def _make_request(self, url):
108115
content += chunk
109116
if len(content) > self.SSE_MAX_SIZE:
110117
break
111-
return content.decode("utf-8", errors="ignore")
118+
response_content = content.decode("utf-8", errors="ignore")
112119
elif res.status_code == 200:
113120
# ANSI 控制字符实现闪动效果
114121
blinking_effect = "\033[5m"
115122
# 修改logger.info调用,输出红色闪动的成功消息
116123
logger.info(f"{blinking_effect}{Fore.RED} [{res.status_code}] [Content-Length: {res.headers.get('Content-Length', 0)}] {Fore.CYAN}<-- [Success] {Fore.RESET}", extra={"target": url})
117124
# 返回前 MAX_RESPONSE_LENGTH 的内容
118-
return res.text[:self.MAX_RESPONSE_LENGTH]
119-
logger.info(f"[{res.status_code}] [Content-Length: {res.headers.get('Content-Length', 0)}]", extra={"target": url})
125+
response_content = res.text[:self.MAX_RESPONSE_LENGTH]
126+
else:
127+
logger.info(f"[{res.status_code}] [Content-Length: {res.headers.get('Content-Length', 0)}]", extra={"target": url})
128+
return None
129+
130+
# 计算响应内容的哈希值
131+
response_hash = hashlib.md5(response_content.encode()).hexdigest()
132+
133+
# 更新哈希值计数器
134+
with self.lock:
135+
if response_hash in self.hash_counter:
136+
self.hash_counter[response_hash] += 1
137+
else:
138+
self.hash_counter[response_hash] = 1
139+
140+
# 如果哈希值重复次数达到阈值,丢弃该路径
141+
if self.hash_counter[response_hash] >= self.HASH_THRESHOLD:
142+
logger.info(f"Hash {response_hash} repeated {self.HASH_THRESHOLD} times, discarding path: {url}")
143+
return None
144+
145+
return response_content
146+
120147
except requests.exceptions.SSLError as ssl_error:
121148
logger.error(f"SSL error occurred for {url}: {ssl_error}", extra={"target": url})
122149
return self._retry_with_different_ssl_version(session, url) # 使用不同的 SSL/TLS 版本重新连接
@@ -188,4 +215,4 @@ def close_sessions(detector_instance):
188215
paths = {"actuator": "_links", "actuator/beans": "beans"}
189216
path_d = PathDetector(paths, proxy_manager)
190217
print(path_d.detect("http://192.168.1.13:8080/"))
191-
print(path_d.detect("http://192.168.1.13:8083/"))
218+
print(path_d.detect("http://192.168.1.13:8083/"))

0 commit comments

Comments
 (0)