Skip to content

Commit

Permalink
Follow local redirects, retry some http
Browse files Browse the repository at this point in the history
  • Loading branch information
lilith committed Sep 30, 2024
1 parent 3b5bfd5 commit e1238d7
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 29 deletions.
65 changes: 47 additions & 18 deletions check-links.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ export async function checkLinksInHtml(
baseUrl,
documentPath,
checkedLinks = new Map(),
distPath = ''
distPath = '',
astroConfigRedirects = {},
logger
) {
const root = parse(htmlContent);
const linkElements = root.querySelectorAll('a[href]');
Expand All @@ -34,29 +36,43 @@ export async function checkLinksInHtml(
absoluteLink = link;
} else {
absoluteLink = new URL(link, "https://localhost" + baseUrl).pathname;
if (link !== absoluteLink) {
console.log('Link', link, 'was converted to', absoluteLink);
}
// if (link !== absoluteLink) {
// logger.info(`Link ${link} was resolved to ${absoluteLink}`);
// }
}
} catch (err) {
// Invalid URL, skip
console.log('Invalid URL in', normalizePath(documentPath), link, err);
logger.error(`Invalid URL in ${normalizePath(documentPath)} ${link} ${err}`);
return;
}

if (checkedLinks.has(absoluteLink)) {
const isBroken = !checkedLinks.get(absoluteLink);
let fetchLink = link;
if (absoluteLink.startsWith('/') && distPath) {
fetchLink = absoluteLink;
}

if (astroConfigRedirects[fetchLink]) {
// Check if the link is a redirect
const redirect = astroConfigRedirects[fetchLink];
if (redirect) {
fetchLink = redirect.destination ? redirect.destination : redirect;
}
}

if (checkedLinks.has(fetchLink)) {
const isBroken = !checkedLinks.get(fetchLink);
if (isBroken) {
addBrokenLink(brokenLinksMap, documentPath, link, distPath);
}
return;
}

let isBroken = false;


if (absoluteLink.startsWith('/') && distPath) {
if (fetchLink.startsWith('/') && distPath) {
// Internal link in build mode, check if file exists
const relativePath = absoluteLink;
const relativePath = fetchLink;
// Potential file paths to check
const possiblePaths = [
path.join(distPath, relativePath),
Expand All @@ -68,22 +84,35 @@ export async function checkLinksInHtml(
if (!possiblePaths.some((p) => fs.existsSync(p))) {
// console.log('Failed paths', possiblePaths);
isBroken = true;
// Fall back to checking a redirect file if it exists.

}
} else {
// External link, check via HTTP request
try {
const response = await fetch(link, { method: 'GET' });
isBroken = !response.ok;
if (isBroken) {
console.log( response.status, ' Error fetching', link);
// External link, check via HTTP request. Retry 3 times if ECONNRESET
let retries = 0;
while (retries < 3) {
try {
const response = await fetch(fetchLink, { method: 'GET' });
isBroken = !response.ok;
if (isBroken) {
logger.error(`${response.status} Error fetching ${fetchLink}`);
}
break;
} catch (error) {
isBroken = true;
let statusCodeNumber = error.errno == 'ENOTFOUND' ? 404 : (error.errno);
logger.error(`${statusCodeNumber} error fetching ${fetchLink}`);
if (error.errno === 'ECONNRESET') {
retries++;
continue;
}
break;
}
} catch (error) {
isBroken = true;
console.log( error.errno, 'error fetching', link);
}
}

// Cache the link's validity
checkedLinks.set(fetchLink, !isBroken);
checkedLinks.set(absoluteLink, !isBroken);

if (isBroken) {
Expand Down
35 changes: 26 additions & 9 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,24 @@ export default function astroBrokenLinksChecker(options = {}) {
const checkedLinks = new Map();

return {

name: 'astro-broken-links-checker',
hooks: {
'astro:build:done': async ({ dir }) => {
'astro:config:setup': async ({ config }) => {
//console.log('config.redirects', config.redirects);
// save the redirects to the options
options.astroConfigRedirects = config.redirects;
},

'astro:build:done': async ({ dir, logger }) => {

const astroConfigRedirects = options.astroConfigRedirects;
//console.log('astroConfigRedirects', astroConfigRedirects);
const distPath = fileURLToPath(dir);
const htmlFiles = await fastGlob('**/*.html', { cwd: distPath });
console.log(`Checking ${htmlFiles.length} html pages for broken links`);
logger.info(`Checking ${htmlFiles.length} html pages for broken links`);
// start time
const startTime = Date.now();
const checkHtmlPromises = htmlFiles.map(async (htmlFile) => {
const absoluteHtmlFilePath = join(distPath, htmlFile);
const htmlContent = fs.readFileSync(absoluteHtmlFilePath, 'utf8');
Expand All @@ -26,17 +38,22 @@ export default function astroBrokenLinksChecker(options = {}) {
baseUrl,
absoluteHtmlFilePath, // Document path
checkedLinks,
distPath
distPath,
astroConfigRedirects,
logger
);
});
await Promise.all(checkHtmlPromises);
logBrokenLinks(brokenLinksMap, logFilePath);
logBrokenLinks(brokenLinksMap, logFilePath, logger);
// end time
const endTime = Date.now();
logger.info(`Time to check links: ${endTime - startTime} ms`);
},
},
};
}

function logBrokenLinks(brokenLinksMap, logFilePath) {
function logBrokenLinks(brokenLinksMap, logFilePath, logger) {
if (brokenLinksMap.size > 0) {
let logData = '';
for (const [brokenLink, documentsSet] of brokenLinksMap.entries()) {
Expand All @@ -49,12 +66,12 @@ function logBrokenLinks(brokenLinksMap, logFilePath) {
logData = logData.trim();
if (logFilePath) {
fs.writeFileSync(logFilePath, logData, 'utf8');
console.log(`Broken links have been logged to ${logFilePath}`);
console.log(logData);
logger.info(`Broken links have been logged to ${logFilePath}`);
logger.info(logData);
} else {
console.log(logData);
logger.info(logData);
}
} else {
console.log('No broken links detected.');
logger.info('No broken links detected.');
}
}
3 changes: 3 additions & 0 deletions tests/astro.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ import { defineConfig } from 'astro/config';
import astroBrokenLinksChecker from 'astro-broken-links-checker';

export default defineConfig({
redirects: {
'/redirected': '/about',
},
integrations: [astroBrokenLinksChecker({
logFilePath: 'broken-links.log',
})],
Expand Down
2 changes: 1 addition & 1 deletion tests/integration.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ describe('Astro Broken Links Checker Integration', () => {
expect(logContent).not.toContain('Broken link: /about'); // Expect '/about' to not be reported as broken
expect(logContent).not.toContain('Broken link: /\n'); // Expect '/about' to not be reported as broken
expect(logContent).not.toContain('Broken link: https://microsoft.com'); // Expect 'https://microsoft.com' to not be reported as broken

expect(logContent).not.toContain('Broken link: /redirected'); // Expect '/redirected' to not be reported as broken
});
});
3 changes: 2 additions & 1 deletion tests/src/pages/index.astro
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
<h1>Home Page</h1>
<a href="/about">About</a>
<a href="/non-existent-page">Broken Link</a>
<a href="https://microsoft.com">Valid external link</a>
<a href="https://microsoft.com">Valid external link</a>
<a href="/redirected">Redirected link</a>

0 comments on commit e1238d7

Please sign in to comment.