diff --git a/internal/pkg/crawl/capture.go b/internal/pkg/crawl/capture.go index 576931c0..0e7308c8 100644 --- a/internal/pkg/crawl/capture.go +++ b/internal/pkg/crawl/capture.go @@ -470,7 +470,8 @@ func (c *Crawl) Capture(item *queue.Item) error { URLsFromXML, isSitemap, err := extractor.XML(resp, false) if err != nil { c.Log.WithFields(c.genLogFields(err, item.URL, nil)).Error("unable to extract URLs from XML") - } else { + } + if len(URLsFromXML) > 0 { if isSitemap { outlinks = append(outlinks, URLsFromXML...) } else { diff --git a/internal/pkg/crawl/extractor/xml.go b/internal/pkg/crawl/extractor/xml.go index 1f2ba2a3..2e8e60df 100644 --- a/internal/pkg/crawl/extractor/xml.go +++ b/internal/pkg/crawl/extractor/xml.go @@ -36,8 +36,10 @@ func XML(resp *http.Response, strict bool) (URLs []*url.URL, sitemap bool, err e // normal EOF break } + if err != nil { - return nil, sitemap, err + // return URLs we got so far when error occurs + return URLs, sitemap, err } switch tok := tok.(type) {