-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawl.go
38 lines (29 loc) · 1.15 KB
/
crawl.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
package main
import (
"flag"
"fmt"
"github.com/jjmschofield/gocrawl/internal/crawl"
"log"
"net/url"
"time"
)
func main() {
crawlUrlRaw := flag.String("url", "https://monzo.com", "an absolute url, including protocol and hostname")
workerCount := flag.Int("workers", 50, "Number of crawl workers to run")
outFilePath := flag.String("dir", "data", "A relative file path to send results to")
flag.Parse()
crawlUrl, err := url.Parse(*crawlUrlRaw)
if err != nil {
log.Panic(err)
}
start := time.Now()
counters := Crawl(*crawlUrl, *workerCount, *outFilePath)
end := time.Now()
fmt.Printf("Scrape Completed in %v ms \n", (end.UnixNano()-start.UnixNano())/int64(time.Millisecond))
fmt.Printf(" Discovered: %v, \n Crawled: %v \n Parallel Crawls Peak: %v \n Scrape Queue Peak: %v \n Processing Peak: %v \n", counters.Discovered.Count(), counters.CrawlComplete.Count(), counters.Crawling.Peak(), counters.CrawlsQueued.Peak(), counters.Processing.Peak())
}
func Crawl(crawlUrl url.URL, workerCount int, outFilePath string) crawl.Counters {
crawler := crawl.NewDefaultPageCrawler(workerCount, outFilePath)
counters := crawler.Crawl(crawlUrl)
return counters
}