Skip to content

Commit 163a3a6

Browse files
committed
感谢七刀米羔师傅, 增加正则缓存部分,减少每次解析前的正则编译
1 parent dde1233 commit 163a3a6

File tree

1 file changed

+56
-31
lines changed

1 file changed

+56
-31
lines changed

whatweb.go

+56-31
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
1-
package whatweb
1+
package main
22

33
import (
44
"encoding/json"
55
"fmt"
66
"github.com/PuerkitoBio/goquery"
7-
"github.com/prometheus/common/log"
87
"io/ioutil"
98
"regexp"
109
"strconv"
1110
"strings"
11+
// "github.com/prometheus/common/log"
1212
)
1313

1414
/*
1515
对 https://github.com/unstppbl/gowap/blob/master/gowap.go 进行修改
1616
17-
传入以下数据即可进行分析
17+
传入以下数据即可进行分析, 已经被改为内存加载具体数据
1818
1919
( url, 响应头[list], 网页内容, js返回内容 )
2020
@@ -23,10 +23,10 @@ TODO:
2323
*/
2424

2525
type HttpData struct {
26-
Url string
27-
Headers map[string][]string
28-
Html string
29-
Jsret string
26+
Url string
27+
Headers map[string][]string
28+
Html string
29+
Jsret string
3030
}
3131

3232
type analyzeData struct {
@@ -64,12 +64,35 @@ type category struct {
6464

6565
// Wappalyzer implements analyze method as original wappalyzer does
6666
type Wappalyzer struct {
67-
HttpData *HttpData
67+
HttpData *HttpData
6868
Apps map[string]*application
6969
Categories map[string]*category
7070
JSON bool
7171
}
7272

73+
var cache = make(map[string]map[string]map[string][]*pattern)
74+
75+
func getPatterns(app *application, typ string) map[string][]*pattern {
76+
return cache[app.Name][typ]
77+
}
78+
79+
func initPatterns(app *application) {
80+
c := map[string]map[string][]*pattern{"url": parsePatterns0(app.URL)}
81+
if app.HTML != nil {
82+
c["html"] = parsePatterns0(app.HTML)
83+
}
84+
if app.Headers != nil {
85+
c["headers"] = parsePatterns0(app.Headers)
86+
}
87+
if app.Cookies != nil {
88+
c["cookies"] = parsePatterns0(app.Cookies)
89+
}
90+
if app.Scripts != nil {
91+
c["scripts"] = parsePatterns0(app.Scripts)
92+
}
93+
cache[app.Name] = c
94+
}
95+
7396
// Init
7497
func Init(appsJSONPath string, JSON bool) (wapp *Wappalyzer, err error) {
7598
wapp = &Wappalyzer{}
@@ -78,14 +101,14 @@ func Init(appsJSONPath string, JSON bool) (wapp *Wappalyzer, err error) {
78101
//}
79102
appsFile, err := ioutil.ReadFile(appsJSONPath)
80103
if err != nil {
81-
log.Errorf("Couldn't open file at %s\n", appsJSONPath)
104+
// log.Errorf("Couldn't open file at %s\n", appsJSONPath)
82105
return nil, err
83106
}
84107

85108
temporary := &temp{}
86109
err = json.Unmarshal(appsFile, &temporary)
87110
if err != nil {
88-
log.Errorf("Couldn't unmarshal apps.json file: %s\n", err)
111+
// log.Errorf("Couldn't unmarshal apps.json file: %s\n", err)
89112
return nil, err
90113
}
91114

@@ -95,7 +118,7 @@ func Init(appsJSONPath string, JSON bool) (wapp *Wappalyzer, err error) {
95118
for k, v := range temporary.Categories {
96119
catg := &category{}
97120
if err = json.Unmarshal(*v, catg); err != nil {
98-
log.Errorf("[!] Couldn't unmarshal Categories: %s\n", err)
121+
// log.Errorf("[!] Couldn't unmarshal Categories: %s\n", err)
99122
return nil, err
100123
}
101124
wapp.Categories[k] = catg
@@ -105,10 +128,11 @@ func Init(appsJSONPath string, JSON bool) (wapp *Wappalyzer, err error) {
105128
app := &application{}
106129
app.Name = k
107130
if err = json.Unmarshal(*v, app); err != nil {
108-
log.Errorf("Couldn't unmarshal Apps: %s\n", err)
131+
// log.Errorf("Couldn't unmarshal Apps: %s\n", err)
109132
return nil, err
110133
}
111134
parseCategories(app, &wapp.Categories)
135+
initPatterns(app)
112136
wapp.Apps[k] = app
113137
}
114138
wapp.JSON = JSON
@@ -124,17 +148,17 @@ type resultApp struct {
124148
implies interface{}
125149
}
126150

127-
func (wapp *Wappalyzer) ConvHeader(headers string) (map[string][]string) {
151+
func (wapp *Wappalyzer) ConvHeader(headers string) map[string][]string {
128152
head := make(map[string][]string)
129153

130154
tmp := strings.Split(strings.TrimRight(headers, "\n"), "\n")
131155
for _, v := range tmp {
132-
if strings.HasPrefix(strings.ToLower(v), "http/1.") {
156+
if strings.HasPrefix(strings.ToLower(v), "http/") {
133157
continue
134158
}
135159
splitStr := strings.Split(v, ":")
136160
header_key := strings.ToLower(strings.Replace(splitStr[0], "_", "-", -1))
137-
header_val := strings.TrimSpace(strings.Join(splitStr[1:],""))
161+
header_val := strings.TrimSpace(strings.Join(splitStr[1:], ""))
138162

139163
head[header_key] = append(head[header_key], header_val)
140164
}
@@ -149,7 +173,7 @@ func (wapp *Wappalyzer) Analyze(httpdata *HttpData) (result interface{}, err err
149173
// analyze html script src
150174
doc, err := goquery.NewDocumentFromReader(strings.NewReader(httpdata.Html))
151175
if err != nil {
152-
log.Fatal(err)
176+
// log.Fatal(err)
153177
}
154178

155179
doc.Find("script").Each(func(i int, s *goquery.Selection) {
@@ -163,7 +187,7 @@ func (wapp *Wappalyzer) Analyze(httpdata *HttpData) (result interface{}, err err
163187
analyze headers cookie
164188
165189
two set-cookie?
166-
*/
190+
*/
167191
analyzeData.cookies = make(map[string]string)
168192
for _, cookie := range httpdata.Headers["set-cookie"] {
169193
keyValues := strings.Split(cookie, ";")
@@ -175,7 +199,6 @@ func (wapp *Wappalyzer) Analyze(httpdata *HttpData) (result interface{}, err err
175199
}
176200
}
177201
}
178-
179202
for _, app := range wapp.Apps {
180203
analyzeURL(app, httpdata.Url, &detectedApplications)
181204
if app.HTML != nil {
@@ -214,6 +237,9 @@ func (wapp *Wappalyzer) Analyze(httpdata *HttpData) (result interface{}, err err
214237
return string(j), nil
215238
}
216239

240+
//fmt.Println(res)
241+
fmt.Println(httpdata.Url, res)
242+
217243
return res, nil
218244
}
219245

@@ -261,7 +287,7 @@ type pattern struct {
261287
confidence string
262288
}
263289

264-
func parsePatterns(patterns interface{}) (result map[string][]*pattern) {
290+
func parsePatterns0(patterns interface{}) (result map[string][]*pattern) {
265291
parsed := make(map[string][]string)
266292
switch ptrn := patterns.(type) {
267293
case string:
@@ -277,7 +303,7 @@ func parsePatterns(patterns interface{}) (result map[string][]*pattern) {
277303
}
278304
parsed["main"] = slice
279305
default:
280-
log.Errorf("Unkown type in parsePatterns: %T\n", ptrn)
306+
// log.Errorf("Unkown type in parsePatterns: %T\n", ptrn)
281307
}
282308
result = make(map[string][]*pattern)
283309
for k, v := range parsed {
@@ -314,7 +340,7 @@ func parsePatterns(patterns interface{}) (result map[string][]*pattern) {
314340
}
315341

316342
func analyzeURL(app *application, url string, detectedApplications *map[string]*resultApp) {
317-
patterns := parsePatterns(app.URL)
343+
patterns := getPatterns(app, "url")
318344
for _, v := range patterns {
319345
for _, pattrn := range v {
320346
if pattrn.regex != nil && pattrn.regex.Match([]byte(url)) {
@@ -361,7 +387,7 @@ func detectVersion(app *resultApp, pattrn *pattern, value *string) {
361387
1. 如果regex为空的话, 就看headers名是否存在了
362388
*/
363389
func analyzeHeaders(app *application, headers map[string][]string, detectedApplications *map[string]*resultApp) {
364-
patterns := parsePatterns(app.Headers)
390+
patterns := getPatterns(app, "headers")
365391
for headerName, v := range patterns {
366392
headerNameLowerCase := strings.ToLower(headerName)
367393

@@ -372,7 +398,7 @@ func analyzeHeaders(app *application, headers map[string][]string, detectedAppli
372398
continue
373399
}
374400

375-
if ok && pattrn.regex == nil {
401+
if ok && pattrn.regex == nil {
376402
resApp := &resultApp{app.Name, app.Version, app.Categories, app.Excludes, app.Implies}
377403
(*detectedApplications)[resApp.Name] = resApp
378404
}
@@ -393,22 +419,24 @@ func analyzeHeaders(app *application, headers map[string][]string, detectedAppli
393419
}
394420

395421
func analyzeHTML(app *application, html string, detectedApplications *map[string]*resultApp) {
396-
patterns := parsePatterns(app.HTML)
422+
patterns := getPatterns(app, "html")
397423
for _, v := range patterns {
398424
for _, pattrn := range v {
425+
399426
if pattrn.regex != nil && pattrn.regex.Match([]byte(html)) {
400427
if _, ok := (*detectedApplications)[app.Name]; !ok {
401428
resApp := &resultApp{app.Name, app.Version, app.Categories, app.Excludes, app.Implies}
402429
(*detectedApplications)[resApp.Name] = resApp
403430
detectVersion(resApp, pattrn, &html)
404431
}
405432
}
433+
406434
}
407435
}
408436
}
409437

410438
func analyzeScripts(app *application, scripts []string, detectedApplications *map[string]*resultApp) {
411-
patterns := parsePatterns(app.Scripts)
439+
patterns := getPatterns(app, "scripts")
412440
for _, v := range patterns {
413441
for _, pattrn := range v {
414442
if pattrn.regex != nil {
@@ -431,19 +459,17 @@ func analyzeScripts(app *application, scripts []string, detectedApplications *ma
431459
1. 如果regex为空的话, 就看session名是否存在了
432460
*/
433461
func analyzeCookies(app *application, cookies map[string]string, detectedApplications *map[string]*resultApp) {
434-
patterns := parsePatterns(app.Cookies)
462+
patterns := getPatterns(app, "cookies")
435463
for cookieName, v := range patterns {
436464
cookieNameLowerCase := strings.ToLower(cookieName)
437465
for _, pattrn := range v {
438-
cookie, ok := cookies[cookieNameLowerCase];
466+
cookie, ok := cookies[cookieNameLowerCase]
439467

440468
if !ok {
441469
continue
442470
}
443471

444-
//fmt.Println(cookie, cookieName, pattrn)
445-
446-
if ok && pattrn.regex == nil {
472+
if ok && pattrn.regex == nil {
447473
if _, ok := (*detectedApplications)[app.Name]; !ok {
448474
resApp := &resultApp{app.Name, app.Version, app.Categories, app.Excludes, app.Implies}
449475
(*detectedApplications)[resApp.Name] = resApp
@@ -460,4 +486,3 @@ func analyzeCookies(app *application, cookies map[string]string, detectedApplica
460486
}
461487
}
462488
}
463-

0 commit comments

Comments
 (0)