From e1238d7c0070cf95d599d92cac6a5d4dba67bfc8 Mon Sep 17 00:00:00 2001 From: Lilith River Date: Sun, 29 Sep 2024 22:51:48 -0600 Subject: [PATCH] Follow local redirects, retry some http --- check-links.js | 65 +++++++++++++++++++++++++++---------- index.js | 35 +++++++++++++++----- tests/astro.config.mjs | 3 ++ tests/integration.test.js | 2 +- tests/src/pages/index.astro | 3 +- 5 files changed, 79 insertions(+), 29 deletions(-) diff --git a/check-links.js b/check-links.js index 27bffd7..1a14c2d 100644 --- a/check-links.js +++ b/check-links.js @@ -11,7 +11,9 @@ export async function checkLinksInHtml( baseUrl, documentPath, checkedLinks = new Map(), - distPath = '' + distPath = '', + astroConfigRedirects = {}, + logger ) { const root = parse(htmlContent); const linkElements = root.querySelectorAll('a[href]'); @@ -34,18 +36,31 @@ export async function checkLinksInHtml( absoluteLink = link; } else { absoluteLink = new URL(link, "https://localhost" + baseUrl).pathname; - if (link !== absoluteLink) { - console.log('Link', link, 'was converted to', absoluteLink); - } + // if (link !== absoluteLink) { + // logger.info(`Link ${link} was resolved to ${absoluteLink}`); + // } } } catch (err) { // Invalid URL, skip - console.log('Invalid URL in', normalizePath(documentPath), link, err); + logger.error(`Invalid URL in ${normalizePath(documentPath)} ${link} ${err}`); return; } - if (checkedLinks.has(absoluteLink)) { - const isBroken = !checkedLinks.get(absoluteLink); + let fetchLink = link; + if (absoluteLink.startsWith('/') && distPath) { + fetchLink = absoluteLink; + } + + if (astroConfigRedirects[fetchLink]) { + // Check if the link is a redirect + const redirect = astroConfigRedirects[fetchLink]; + if (redirect) { + fetchLink = redirect.destination ? redirect.destination : redirect; + } + } + + if (checkedLinks.has(fetchLink)) { + const isBroken = !checkedLinks.get(fetchLink); if (isBroken) { addBrokenLink(brokenLinksMap, documentPath, link, distPath); } @@ -53,10 +68,11 @@ export async function checkLinksInHtml( } let isBroken = false; + - if (absoluteLink.startsWith('/') && distPath) { + if (fetchLink.startsWith('/') && distPath) { // Internal link in build mode, check if file exists - const relativePath = absoluteLink; + const relativePath = fetchLink; // Potential file paths to check const possiblePaths = [ path.join(distPath, relativePath), @@ -68,22 +84,35 @@ export async function checkLinksInHtml( if (!possiblePaths.some((p) => fs.existsSync(p))) { // console.log('Failed paths', possiblePaths); isBroken = true; + // Fall back to checking a redirect file if it exists. + } } else { - // External link, check via HTTP request - try { - const response = await fetch(link, { method: 'GET' }); - isBroken = !response.ok; - if (isBroken) { - console.log( response.status, ' Error fetching', link); + // External link, check via HTTP request. Retry 3 times if ECONNRESET + let retries = 0; + while (retries < 3) { + try { + const response = await fetch(fetchLink, { method: 'GET' }); + isBroken = !response.ok; + if (isBroken) { + logger.error(`${response.status} Error fetching ${fetchLink}`); + } + break; + } catch (error) { + isBroken = true; + let statusCodeNumber = error.errno == 'ENOTFOUND' ? 404 : (error.errno); + logger.error(`${statusCodeNumber} error fetching ${fetchLink}`); + if (error.errno === 'ECONNRESET') { + retries++; + continue; + } + break; } - } catch (error) { - isBroken = true; - console.log( error.errno, 'error fetching', link); } } // Cache the link's validity + checkedLinks.set(fetchLink, !isBroken); checkedLinks.set(absoluteLink, !isBroken); if (isBroken) { diff --git a/index.js b/index.js index 76c3018..f62d625 100644 --- a/index.js +++ b/index.js @@ -10,12 +10,24 @@ export default function astroBrokenLinksChecker(options = {}) { const checkedLinks = new Map(); return { + name: 'astro-broken-links-checker', hooks: { - 'astro:build:done': async ({ dir }) => { + 'astro:config:setup': async ({ config }) => { + //console.log('config.redirects', config.redirects); + // save the redirects to the options + options.astroConfigRedirects = config.redirects; + }, + + 'astro:build:done': async ({ dir, logger }) => { + + const astroConfigRedirects = options.astroConfigRedirects; + //console.log('astroConfigRedirects', astroConfigRedirects); const distPath = fileURLToPath(dir); const htmlFiles = await fastGlob('**/*.html', { cwd: distPath }); - console.log(`Checking ${htmlFiles.length} html pages for broken links`); + logger.info(`Checking ${htmlFiles.length} html pages for broken links`); + // start time + const startTime = Date.now(); const checkHtmlPromises = htmlFiles.map(async (htmlFile) => { const absoluteHtmlFilePath = join(distPath, htmlFile); const htmlContent = fs.readFileSync(absoluteHtmlFilePath, 'utf8'); @@ -26,17 +38,22 @@ export default function astroBrokenLinksChecker(options = {}) { baseUrl, absoluteHtmlFilePath, // Document path checkedLinks, - distPath + distPath, + astroConfigRedirects, + logger ); }); await Promise.all(checkHtmlPromises); - logBrokenLinks(brokenLinksMap, logFilePath); + logBrokenLinks(brokenLinksMap, logFilePath, logger); + // end time + const endTime = Date.now(); + logger.info(`Time to check links: ${endTime - startTime} ms`); }, }, }; } -function logBrokenLinks(brokenLinksMap, logFilePath) { +function logBrokenLinks(brokenLinksMap, logFilePath, logger) { if (brokenLinksMap.size > 0) { let logData = ''; for (const [brokenLink, documentsSet] of brokenLinksMap.entries()) { @@ -49,12 +66,12 @@ function logBrokenLinks(brokenLinksMap, logFilePath) { logData = logData.trim(); if (logFilePath) { fs.writeFileSync(logFilePath, logData, 'utf8'); - console.log(`Broken links have been logged to ${logFilePath}`); - console.log(logData); + logger.info(`Broken links have been logged to ${logFilePath}`); + logger.info(logData); } else { - console.log(logData); + logger.info(logData); } } else { - console.log('No broken links detected.'); + logger.info('No broken links detected.'); } } \ No newline at end of file diff --git a/tests/astro.config.mjs b/tests/astro.config.mjs index 98a39ca..716969a 100644 --- a/tests/astro.config.mjs +++ b/tests/astro.config.mjs @@ -2,6 +2,9 @@ import { defineConfig } from 'astro/config'; import astroBrokenLinksChecker from 'astro-broken-links-checker'; export default defineConfig({ + redirects: { + '/redirected': '/about', + }, integrations: [astroBrokenLinksChecker({ logFilePath: 'broken-links.log', })], diff --git a/tests/integration.test.js b/tests/integration.test.js index e311990..ca2892c 100644 --- a/tests/integration.test.js +++ b/tests/integration.test.js @@ -55,6 +55,6 @@ describe('Astro Broken Links Checker Integration', () => { expect(logContent).not.toContain('Broken link: /about'); // Expect '/about' to not be reported as broken expect(logContent).not.toContain('Broken link: /\n'); // Expect '/about' to not be reported as broken expect(logContent).not.toContain('Broken link: https://microsoft.com'); // Expect 'https://microsoft.com' to not be reported as broken - + expect(logContent).not.toContain('Broken link: /redirected'); // Expect '/redirected' to not be reported as broken }); }); diff --git a/tests/src/pages/index.astro b/tests/src/pages/index.astro index 2cc8008..c1116ed 100644 --- a/tests/src/pages/index.astro +++ b/tests/src/pages/index.astro @@ -3,4 +3,5 @@

Home Page

About Broken Link -Valid external link \ No newline at end of file +Valid external link +Redirected link \ No newline at end of file