Skip to content

Commit b739517

Browse files
committedSep 11, 2017
modify function
1 parent 0c5280e commit b739517

7 files changed

+164
-111
lines changed
 

‎LittleFileScan.py

+132-91
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717
import Queue
1818
import threading
1919
import argparse
20+
import multiprocessing
21+
from libs.common import *
2022
from libs.GenerateDict import ProcessDic
2123

2224
class myFileScan:
2325
def __init__(self, url, custom_extion="php",full_scan=False, verbose=True):
2426
# the url only be www.iqiyi.com or 127.0.0.1:8080
25-
self.url = url
26-
self.compeleUrl()
27+
self.url = compeleUrl(url)
28+
2729
# self.url_queue = Queue.Queue()
2830
self.file_queue = Queue.Queue()
2931
self.lock = threading.Lock()
@@ -45,7 +47,8 @@ def __init__(self, url, custom_extion="php",full_scan=False, verbose=True):
4547
self.error_content_length = 0
4648
self.dir_in_content = False
4749
self.error_location = ""
48-
self.check_404()
50+
51+
4952

5053

5154
# available dirs
@@ -56,6 +59,11 @@ def __init__(self, url, custom_extion="php",full_scan=False, verbose=True):
5659

5760
self.generateFileAndDir()
5861

62+
# check 404
63+
self.checkUrl()
64+
self.check_404(types="file")
65+
self.checkFile()
66+
5967

6068

6169

@@ -87,34 +95,37 @@ def generateFileAndDir(self):
8795
# default generate 188 dirs and 868 files(bak & tmp)
8896
# so next, we fuzz dirs, and add possible dirs, then we generate dirs and files
8997

90-
def check_404(self):
98+
def check_404(self, types="dir"):
9199
error_dir = "this_is_error_dirs_test/"
92100
error_files = "this_is_error_files_test/hello.html"
93101

94102

95103
# Stitching url
96-
_ = self.url + "/" + error_files
104+
if types == "dir":
105+
_ = self.url + "/" + error_dir
106+
elif types == "file":
107+
_ = self.url + "/" + error_files
108+
else:
109+
raise Exception("[-] [Error] [check_404] types not Identify")
97110
try:
98-
resp = requests.get(_, headers=headers, timeout=timeout, allow_redirects=False, verify=allow_ssl_verify)
99-
_content = resp.content.decode("utf-8", "ignore")
100-
_content = _content.replace("\r\n", "").replace(" ", "")
101-
print _content
102-
print resp
111+
resp = requests.get(_, headers=headers, timeout=5, allow_redirects=True, verify=allow_ssl_verify)
112+
_content = decode_response_text(resp.content)
113+
#_content = _content.replace("\r\n", "").replace(" ", "")
103114

104115
self.error_status_code = resp.status_code
105116

106117
# if 302 or 301, get Location
107-
if resp.status_code in [301, 302] and "Location" in resp.headers:
108-
self.error_location = resp.headers["Location"]
109-
110-
if self.error_pattern.match(_content):
118+
if resp.url != _:
119+
self.error_location = resp.url
120+
else:
121+
self.error_location = ""
122+
if self.error_pattern.findall(_content):
111123
self.error_flag = True
112-
113-
if error_dir in _content:
114-
self.dir_in_content = True
115-
self.error_content_length = len(_content) - len(_content)
116124
else:
117-
self.error_content_length = len(_content)
125+
self.error_flag = False
126+
# if resp.status_code in [301, 302] and "Location" in resp.headers:
127+
# self.error_location = resp.headers["Location"]
128+
self.error_content_length = len(_content)
118129
except Exception as e:
119130
self.has_404 = False
120131
print "[-] [Error] [myFileScan] [check_404] Request Error " + str(e)
@@ -151,54 +162,50 @@ def verifyAlive(self, dirs, types="dir", compress=False):
151162
return False
152163

153164
else:
154-
resp = requests.get(_url, headers=headers, timeout=timeout, allow_redirects=False, verify=allow_ssl_verify)
155-
_content = resp.content.decode("utf-8", "ignore")
156-
_content = _content.replace("\r\n", "").replace(" ", "")
165+
resp = requests.get(_url, headers=headers, timeout=timeout, allow_redirects=True, verify=allow_ssl_verify)
166+
_content = decode_response_text(resp.content)
157167
# 判断是否在400, 404, 501, 502, 503, 505,如果是直接返回False
158168
if self.verbose:
159169
print "[+] [Info] [myFileScan] [verifyAlive] verify: {:25} status_code: {}".format(_url, resp.status_code)
160-
if resp.status_code in [400, 403, 404, 405, 500, 501, 502, 503, 505]:
170+
if resp.status_code in [400, 403, 404, 414, 405, 500, 501, 502, 503, 505]:
161171
return False
162172

163173
if resp.status_code in [301, 302]:
164174
if "Location" in resp.headers:
165175
if resp.headers["Location"] == self.error_location:
166176
return False
177+
178+
# 直接匹配错误标识
179+
if self.error_flag:
180+
if self.error_pattern.findall(_content):
181+
return False
182+
else:
183+
return True
184+
185+
167186
# 如果有404错误页面的响应
168187
if self.has_404:
169188
# 如果返回码不是404, 但是判断是否是与error_status_code
170189
if resp.status_code == self.error_status_code:
171-
# 判断是否有404标志
172-
if self.error_flag:
173-
if self.error_pattern.match(_content):
174-
return False
175-
else:
176-
return True
177-
# 如果没有404标志,那么对比长度
190+
mins = min(self.error_content_length, len(_content))
191+
if mins == 0:
192+
mins = 10.0
193+
if abs(float(self.error_content_length, len(_content))) / mins > 0.3:
194+
return True
178195
else:
179-
if self.dir_in_content:
180-
l_content = len(_content) - len(dirs)
181-
if abs(l_content - self.error_content_length) < 10:
182-
return False
183-
else:
184-
return True
185-
else:
186-
if abs(len(_content) - self.error_content_length) < 10:
187-
return False
188-
else:
189-
return True
196+
return False
190197

191198
# 如果不在上边,且不和error_code相等,那么先认为为True
192199
else:
193-
return True
200+
return False
194201
else:
195202
# 如果check_404请求失败,怎么办呢?, 先return True
196-
return True
203+
return False
197204

198205
except Exception as e:
199206
# 如果出错了,认为是True, 即404
200207
print "[-] [Error] [myFileScan] [verifyAlive] " + str(e)
201-
return True
208+
return False
202209

203210
# 这样判断,首先判断是否是404,如果是,那么404标志设置为true
204211
#
@@ -207,7 +214,7 @@ def verifyAlive(self, dirs, types="dir", compress=False):
207214
# 如果无匹配,那么看错误长度 error_length, 错误长度在一定的相差范围内,则认为是不存在的
208215

209216

210-
217+
"""
211218
def compeleUrl(self):
212219
# judge if self.url contains ":"
213220
scheme = "http"
@@ -228,7 +235,7 @@ def compeleUrl(self):
228235
self.url = scheme + "://" + self.url
229236
# last, remove last "/"
230237
self.url = self.url.rstrip("/")
231-
238+
"""
232239

233240

234241
# 先写一块吧,回头再拆开
@@ -237,7 +244,8 @@ def checkUrl(self):
237244
if self.verifyAlive(webdir):
238245
self.available_dirs.append(webdir)
239246
#self.available_dirs.append("/")
240-
247+
if not self.available_dirs:
248+
self.available_dirs.append("/")
241249
print self.available_dirs
242250
print "that's all available_dirs"
243251
#time.sleep(10)
@@ -274,8 +282,10 @@ def checkFile(self):
274282

275283

276284
def runFuzz(self):
285+
277286
while (not self.file_queue.empty()) and self.STOP_ME == False:
278287
_ = self.file_queue.get()
288+
# print "[runFuzz] Url:\t" + str(_[0])
279289
result = self.verifyAlive(_[0], types="file", compress=_[1])
280290
if result:
281291
self.lock.acquire()
@@ -286,34 +296,74 @@ def runFuzz(self):
286296

287297
def parseArgs():
288298
parser = argparse.ArgumentParser()
289-
parser.add_argument("--host", help="the target host, MUST HAVE")
299+
parser.add_argument("--host", help="the target host")
290300
parser.add_argument("--ext", help="the extend name, default php", default="php")
291301
parser.add_argument("-v", help="show more detail when running", action="store_true")
302+
parser.add_argument("-f", help="file contains ip:port or subDomais each line")
292303
parser.add_argument("--full", help="Use All Dict (May be more False positives and take more time)", action="store_true")
293304
parser.add_argument("-t", "--threadnum", help="the number of thread count, default 15", default=15)
294305
args = parser.parse_args()
295-
if args.host is None:
306+
if args.host is None and args.f is None:
307+
print "[--host/-f ] Must Contains One"
296308
parser.print_usage()
297309
exit(0)
298310
else:
299311
return args
300312

301313

314+
def check_url_alive(url):
315+
url = compeleUrl(url)
316+
print "[check_url_alive]:\t" + url
317+
try:
318+
resp = requests.get(url, headers={"User-Agent": "check_url_alive", "Connection": "Close"}, timeout=5, allow_redirects=False, verify=False)
319+
return True
320+
except:
321+
return False
302322

303323

304-
305-
306-
def ScanApi(host, custom_extion="php", verbose=True, full_scan=False):
307-
a = myFileScan(args.host, custom_extion=custom_extion, verbose=verbose, full_scan=full_scan)
308-
a.checkUrl()
309-
a.checkFile()
324+
def compeleUrl(url):
325+
# judge if self.url contains ":"
326+
scheme = "http"
327+
if ":" in url:
328+
# judge if 443
329+
if url.split(":")[-1] == "443":
330+
scheme = "https"
331+
else:
332+
scheme = "http"
333+
# judge if start with "://"
334+
if url.startswith("://"):
335+
url = url[3:]
336+
elif url.startswith("//"):
337+
url = url[2:]
338+
339+
# now judge if start with http
340+
if not url.startswith("http"):
341+
url = scheme + "://" + url
342+
# last, remove last "/"
343+
url = url.rstrip("/")
344+
return url
345+
346+
347+
def ScanApi(host, args):
348+
print str(host) + str(args)
349+
custom_extion = args.ext
350+
verbose = args.v
351+
full_scan = args.full
352+
a = myFileScan(host, custom_extion=custom_extion, verbose=verbose, full_scan=full_scan)
353+
threads = []
310354
for i in range(int(args.threadnum)):
311355
thd = threading.Thread(target=a.runFuzz)
356+
threads.append(thd)
312357
thd.setDaemon(True)
313358
thd.start()
314359

315360
while True:
316-
if threading.activeCount() <= 1:
361+
count = 0
362+
for thd in threads:
363+
if thd.is_alive():
364+
count += 1
365+
366+
if count == 0:
317367
break
318368
else:
319369
try:
@@ -323,41 +373,32 @@ def ScanApi(host, custom_extion="php", verbose=True, full_scan=False):
323373
a.STOP_ME = True
324374

325375

326-
if __name__ == '__main__':
327-
"""
328-
Usage = \"""
329-
python LittleFileScan.py host ext
330-
331-
i.e. python LittleFileScan.py www.iqiyi.com jsp
332-
i.e. python LittleFileScan.py www.iqiyi.com
333-
i.e. python LittleFileScan.py 127.0.0.1:8832
334-
\"""
376+
def batchFileScan(args):
377+
custom_extion = args.ext
378+
verbose = args.v
379+
full_scan = args.full
380+
pool = multiprocessing.Pool(4)
381+
target_queue = Queue.Queue()
382+
if args.f:
383+
with open(args.f, "r") as f:
384+
x = f.readlines()
385+
386+
for line in x:
387+
if not check_url_alive(line.strip()):
388+
continue
389+
print "[batchFileScan] add \t" + line.strip()
390+
target_queue.put(line.strip())
335391

336-
if len(sys.argv) > 3:
337-
print Usage
338-
sys.exit(-1)
339-
if len(sys.argv) == 3:
340-
host = sys.argv[1]
341-
custom_extion = sys.argv[2]
342-
ScanApi(host, custom_extion=custom_extion)
343-
elif len(sys.argv) == 2:
344-
host = sys.argv[1]
345-
ScanApi(host)
346-
else:
347-
print Usage
348-
sys.exit(-1)
349-
"""
350-
351-
full_scan = False
352-
custom_extion ="php"
353-
verbose = False
354-
args = parseArgs()
355-
if args.full:
356-
full_scan = True
357-
if args.ext:
358-
custom_extion = args.ext
359-
if args.v:
360-
verbose = True
361-
ScanApi(args.host, custom_extion=custom_extion, verbose=verbose, full_scan=full_scan)
392+
while not target_queue.empty():
393+
host = target_queue.get(timeout=1)
394+
print "[test] [Host]:\t" + host
395+
pool.apply(ScanApi, (host, args))
396+
397+
pool.close()
398+
pool.join()
399+
print 'All subprocesses done.'
362400

363401

402+
if __name__ == '__main__':
403+
args = parseArgs()
404+
batchFileScan(args)

‎LittleFileScan_old.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ def check_404(self, types="dir"):
113113
self.error_location = resp.url
114114
else:
115115
self.error_location = ""
116+
# 判断是否有404标志
117+
if self.error_pattern.findall(_content):
118+
self.error_flag = True
119+
else:
120+
self.error_flag = False
116121
# if resp.status_code in [301, 302] and "Location" in resp.headers:
117122
# self.error_location = resp.headers["Location"]
118123
self.error_content_length = len(_content)
@@ -166,27 +171,24 @@ def verifyAlive(self, dirs, types="dir", compress=False):
166171
return False
167172

168173
# 直接匹配错误标识
169-
if self.error_pattern.findall(_content):
170-
return False
174+
if self.error_flag:
175+
if self.error_pattern.findall(_content):
176+
return False
177+
else:
178+
return True
179+
180+
171181
# 如果有404错误页面的响应
172182
if self.has_404:
173183
# 如果返回码不是404, 但是判断是否是与error_status_code
174184
if resp.status_code == self.error_status_code:
175-
# 判断是否有404标志
176-
if self.error_flag:
177-
if self.error_pattern.findall(_content):
178-
return False
179-
else:
180-
return True
181-
# 如果没有404标志,那么对比长度
185+
mins = min(self.error_content_length, len(_content))
186+
if mins == 0:
187+
mins = 10.0
188+
if abs(float(self.error_content_length, len(_content))) / mins > 0.3:
189+
return True
182190
else:
183-
mins = min(self.error_content_length, len(_content))
184-
if mins == 0:
185-
mins = 10.0
186-
if abs(float(self.error_content_length, len(_content))) / mins > 0.3:
187-
return True
188-
else:
189-
return False
191+
return False
190192

191193
# 如果不在上边,且不和error_code相等,那么先认为为True
192194
else:

‎config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
# -------------------------------------------------
5959

6060
# 超时时间
61-
timeout = 10
61+
timeout = 6
6262

6363
# 是否允许URL重定向
6464
allow_redirects = True

‎config.pyc

-57 Bytes
Binary file not shown.

‎dict/dependents.lst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
%DEPEN_NAME%{re=exrex:[0-9]}$
2-
%DEPEN_NAME%{re=exrex:(200[0-9])|(201[0-5])}$
2+
%DEPEN_NAME%{re=exrex:(201[5-7])}$
33
%DEPEN_NAME%{re=exrex:(!?!|!!!)|(@?@|@@@)|123}$
44
%EXT%{re=exrex:(!?!|!!!)|(@?@|@@@)|123}$
55
%DOMAIN%{re=exrex:(!?!|!!!)|(@?@|@@@)|123}$

‎dict/directory.lst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
/{date=year:2010-2015}$
2-
/{date=year_mon:201001-201512}$
1+
/{date=year:2015-2017}$
2+
/{date=year_mon:201501-201712}$
33
/a3
44
/abstract
55
/account

‎test.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
blogs.autohome.com.cn
2+
platform.autohome.com.cn
3+
welcome.autohome.com.cn
4+
science.autohome.com.cn
5+
misto.autohome.com.cn
6+
event.autohome.com.cn
7+
services.autohome.com.cn
8+
video.autohome.com.cn
9+
media.autohome.com.cn
10+
stats.autohome.com.cn

0 commit comments

Comments
 (0)
Please sign in to comment.