Skip to content

Commit

Permalink
xml: return URLs we got when error occurs
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss authored and CorentinB committed Nov 12, 2024
1 parent 840132c commit 07fd5fe
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
3 changes: 2 additions & 1 deletion internal/pkg/crawl/capture.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,8 @@ func (c *Crawl) Capture(item *queue.Item) error {
URLsFromXML, isSitemap, err := extractor.XML(resp, false)
if err != nil {
c.Log.WithFields(c.genLogFields(err, item.URL, nil)).Error("unable to extract URLs from XML")
} else {
}
if len(URLsFromXML) > 0 {
if isSitemap {
outlinks = append(outlinks, URLsFromXML...)
} else {
Expand Down
4 changes: 3 additions & 1 deletion internal/pkg/crawl/extractor/xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ func XML(resp *http.Response, strict bool) (URLs []*url.URL, sitemap bool, err e
// normal EOF
break
}

if err != nil {
return nil, sitemap, err
// return URLs we got so far when error occurs
return URLs, sitemap, err
}

switch tok := tok.(type) {
Expand Down

0 comments on commit 07fd5fe

Please sign in to comment.