Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(core): Improve URL parsing utilities #15882

Merged
merged 7 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .size-limit.js
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ module.exports = [
import: createImport('init'),
ignore: ['$app/stores'],
gzip: true,
limit: '38 KB',
limit: '38.5 KB',
},
// Node SDK (ESM)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ test('should correctly instrument `fetch` for performance tracing', async ({ pag
'sentry.op': 'http.client',
'sentry.origin': 'auto.http.browser',
},
description: 'GET https://example.com',
description: 'GET https://example.com/',
op: 'http.client',
parent_span_id: expect.stringMatching(/[a-f0-9]{16}/),
span_id: expect.stringMatching(/[a-f0-9]{16}/),
Expand Down
76 changes: 43 additions & 33 deletions packages/core/src/fetch.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
/* eslint-disable complexity */
import { getClient } from './currentScopes';
import { SEMANTIC_ATTRIBUTE_SENTRY_OP, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from './semanticAttributes';
import { SPAN_STATUS_ERROR, setHttpStatus, startInactiveSpan } from './tracing';
import { SentryNonRecordingSpan } from './tracing/sentryNonRecordingSpan';
import type { FetchBreadcrumbHint, HandlerDataFetch, Span, SpanOrigin } from './types-hoist';
import type { FetchBreadcrumbHint, HandlerDataFetch, Span, SpanAttributes, SpanOrigin } from './types-hoist';
import { SENTRY_BAGGAGE_KEY_PREFIX } from './utils-hoist/baggage';
import { isInstanceOf } from './utils-hoist/is';
import { parseStringToURL, stripUrlQueryAndFragment } from './utils-hoist/url';
import { getSanitizedUrlStringFromUrlObject, isURLObjectRelative, parseStringToURLObject } from './utils-hoist/url';
import { hasSpansEnabled } from './utils/hasSpansEnabled';
import { getActiveSpan } from './utils/spanUtils';
import { getTraceData } from './utils/traceData';
Expand Down Expand Up @@ -54,40 +53,11 @@ export function instrumentFetchRequest(
return undefined;
}

// Curious about `thismessage:/`? See: https://www.rfc-editor.org/rfc/rfc2557.html
// > When the methods above do not yield an absolute URI, a base URL
// > of "thismessage:/" MUST be employed. This base URL has been
// > defined for the sole purpose of resolving relative references
// > within a multipart/related structure when no other base URI is
// > specified.
//
// We need to provide a base URL to `parseStringToURL` because the fetch API gives us a
// relative URL sometimes.
//
// This is the only case where we need to provide a base URL to `parseStringToURL`
// because the relative URL is not valid on its own.
const parsedUrl = url.startsWith('/') ? parseStringToURL(url, 'thismessage:/') : parseStringToURL(url);
const fullUrl = url.startsWith('/') ? undefined : parsedUrl?.href;

const hasParent = !!getActiveSpan();

const span =
shouldCreateSpanResult && hasParent
? startInactiveSpan({
name: `${method} ${stripUrlQueryAndFragment(url)}`,
attributes: {
url,
type: 'fetch',
'http.method': method,
'http.url': parsedUrl?.href,
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin,
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',
...(fullUrl && { 'http.url': fullUrl }),
...(fullUrl && parsedUrl?.host && { 'server.address': parsedUrl.host }),
...(parsedUrl?.search && { 'http.query': parsedUrl.search }),
...(parsedUrl?.hash && { 'http.fragment': parsedUrl.hash }),
},
})
? startInactiveSpan(getSpanStartOptions(url, method, spanOrigin))
: new SentryNonRecordingSpan();

handlerData.fetchData.__span = span.spanContext().spanId;
Expand Down Expand Up @@ -264,3 +234,43 @@ function isRequest(request: unknown): request is Request {
function isHeaders(headers: unknown): headers is Headers {
return typeof Headers !== 'undefined' && isInstanceOf(headers, Headers);
}

function getSpanStartOptions(
url: string,
method: string,
spanOrigin: SpanOrigin,
): Parameters<typeof startInactiveSpan>[0] {
const parsedUrl = parseStringToURLObject(url);
return {
name: parsedUrl ? `${method} ${getSanitizedUrlStringFromUrlObject(parsedUrl)}` : method,
attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin),
};
}

function getFetchSpanAttributes(
url: string,
parsedUrl: ReturnType<typeof parseStringToURLObject>,
method: string,
spanOrigin: SpanOrigin,
): SpanAttributes {
const attributes: SpanAttributes = {
url,
type: 'fetch',
'http.method': method,
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin,
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',
};
if (parsedUrl) {
if (!isURLObjectRelative(parsedUrl)) {
attributes['http.url'] = parsedUrl.href;
attributes['server.address'] = parsedUrl.host;
}
if (parsedUrl.search) {
attributes['http.query'] = parsedUrl.search;
}
if (parsedUrl.hash) {
attributes['http.fragment'] = parsedUrl.hash;
}
}
return attributes;
}
9 changes: 8 additions & 1 deletion packages/core/src/utils-hoist/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,14 @@ export {
objectToBaggageHeader,
} from './baggage';

export { getSanitizedUrlString, parseUrl, stripUrlQueryAndFragment } from './url';
export {
getSanitizedUrlString,
parseUrl,
stripUrlQueryAndFragment,
parseStringToURLObject,
isURLObjectRelative,
getSanitizedUrlStringFromUrlObject,
} from './url';
export { eventFromMessage, eventFromUnknownInput, exceptionFromError, parseStackFrames } from './eventbuilder';
export { callFrameToStackFrame, watchdogTimer } from './anr';
export { LRUMap } from './lru';
Expand Down
77 changes: 75 additions & 2 deletions packages/core/src/utils-hoist/url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,50 @@ interface URLwithCanParse extends URL {
canParse: (url: string, base?: string | URL | undefined) => boolean;
}

// A subset of the URL object that is valid for relative URLs
// The URL object cannot handle relative URLs, so we need to handle them separately
type RelativeURL = {
isRelative: true;
pathname: URL['pathname'];
search: URL['search'];
hash: URL['hash'];
};

type URLObject = RelativeURL | URL;

// Curious about `thismessage:/`? See: https://www.rfc-editor.org/rfc/rfc2557.html
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dayum

// > When the methods above do not yield an absolute URI, a base URL
// > of "thismessage:/" MUST be employed. This base URL has been
// > defined for the sole purpose of resolving relative references
// > within a multipart/related structure when no other base URI is
// > specified.
//
// We need to provide a base URL to `parseStringToURLObject` because the fetch API gives us a
// relative URL sometimes.
//
// This is the only case where we need to provide a base URL to `parseStringToURLObject`
// because the relative URL is not valid on its own.
const DEFAULT_BASE_URL = 'thismessage:/';

/**
* Checks if the URL object is relative
*
* @param url - The URL object to check
* @returns True if the URL object is relative, false otherwise
*/
export function isURLObjectRelative(url: URLObject): url is RelativeURL {
return 'isRelative' in url;
}

/**
* Parses string to a URL object
*
* @param url - The URL to parse
* @returns The parsed URL object or undefined if the URL is invalid
*/
export function parseStringToURL(url: string, base?: string | URL | undefined): URL | undefined {
export function parseStringToURLObject(url: string, urlBase?: string | URL | undefined): URLObject | undefined {
const isRelative = url.startsWith('/');
const base = urlBase ?? (isRelative ? DEFAULT_BASE_URL : undefined);
try {
// Use `canParse` to short-circuit the URL constructor if it's not a valid URL
// This is faster than trying to construct the URL and catching the error
Expand All @@ -26,14 +63,50 @@ export function parseStringToURL(url: string, base?: string | URL | undefined):
return undefined;
}

return new URL(url, base);
const fullUrlObject = new URL(url, base);
if (isRelative) {
// Because we used a fake base URL, we need to return a relative URL object.
// We cannot return anything about the origin, host, etc. because it will refer to the fake base URL.
return {
isRelative,
pathname: fullUrlObject.pathname,
search: fullUrlObject.search,
hash: fullUrlObject.hash,
};
}
return fullUrlObject;
} catch {
// empty body
}

return undefined;
}

/**
* Takes a URL object and returns a sanitized string which is safe to use as span name
* see: https://develop.sentry.dev/sdk/data-handling/#structuring-data
*/
export function getSanitizedUrlStringFromUrlObject(url: URLObject): string {
if (isURLObjectRelative(url)) {
return url.pathname;
}

const newUrl = new URL(url);
newUrl.search = '';
newUrl.hash = '';
if (['80', '443'].includes(newUrl.port)) {
newUrl.port = '';
}
if (newUrl.password) {
newUrl.password = '%filtered%';
}
if (newUrl.username) {
newUrl.username = '%filtered%';
}

return newUrl.toString();
}

/**
* Parses string form of URL into an object
* // borrowed from https://tools.ietf.org/html/rfc3986#appendix-B
Expand Down
95 changes: 88 additions & 7 deletions packages/core/test/utils-hoist/url.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import { describe, expect, it } from 'vitest';
import { getSanitizedUrlString, parseStringToURL, parseUrl, stripUrlQueryAndFragment } from '../../src/utils-hoist/url';
import {
getSanitizedUrlString,
parseUrl,
stripUrlQueryAndFragment,
parseStringToURLObject,
isURLObjectRelative,
getSanitizedUrlStringFromUrlObject,
} from '../../src/utils-hoist/url';

describe('stripQueryStringAndFragment', () => {
const urlString = 'http://dogs.are.great:1231/yay/';
Expand Down Expand Up @@ -62,8 +69,6 @@ describe('getSanitizedUrlString', () => {
'https://[filtered]:[filtered]@somedomain.com',
],
['same-origin url', '/api/v4/users?id=123', '/api/v4/users'],
['url without a protocol', 'example.com', 'example.com'],
['url without a protocol with a path', 'example.com/sub/path?id=123', 'example.com/sub/path'],
['url with port 8080', 'http://172.31.12.144:8080/test', 'http://172.31.12.144:8080/test'],
['url with port 4433', 'http://172.31.12.144:4433/test', 'http://172.31.12.144:4433/test'],
['url with port 443', 'http://172.31.12.144:443/test', 'http://172.31.12.144/test'],
Expand Down Expand Up @@ -197,19 +202,95 @@ describe('parseUrl', () => {
});
});

describe('parseStringToURL', () => {
describe('parseStringToURLObject', () => {
it('returns undefined for invalid URLs', () => {
expect(parseStringToURL('invalid-url')).toBeUndefined();
expect(parseStringToURLObject('invalid-url')).toBeUndefined();
});

it('returns a URL object for valid URLs', () => {
expect(parseStringToURL('https://somedomain.com')).toBeInstanceOf(URL);
expect(parseStringToURLObject('https://somedomain.com')).toBeInstanceOf(URL);
});

it('returns a URL object for valid URLs with a base URL', () => {
expect(parseStringToURLObject('https://somedomain.com', 'https://base.com')).toBeInstanceOf(URL);
});

it('returns a relative URL object for relative URLs', () => {
expect(parseStringToURLObject('/path/to/happiness')).toEqual({
isRelative: true,
pathname: '/path/to/happiness',
search: '',
hash: '',
});
});

it('does not throw an error if URl.canParse is not defined', () => {
const canParse = (URL as any).canParse;
delete (URL as any).canParse;
expect(parseStringToURL('https://somedomain.com')).toBeInstanceOf(URL);
expect(parseStringToURLObject('https://somedomain.com')).toBeInstanceOf(URL);
(URL as any).canParse = canParse;
});
});

describe('isURLObjectRelative', () => {
it('returns true for relative URLs', () => {
expect(isURLObjectRelative(parseStringToURLObject('/path/to/happiness')!)).toBe(true);
});

it('returns false for absolute URLs', () => {
expect(isURLObjectRelative(parseStringToURLObject('https://somedomain.com')!)).toBe(false);
});
});

describe('getSanitizedUrlStringFromUrlObject', () => {
it.each([
['regular url', 'https://somedomain.com', 'https://somedomain.com/'],
['regular url with a path', 'https://somedomain.com/path/to/happiness', 'https://somedomain.com/path/to/happiness'],
[
'url with standard http port 80',
'http://somedomain.com:80/path/to/happiness',
'http://somedomain.com/path/to/happiness',
],
[
'url with standard https port 443',
'https://somedomain.com:443/path/to/happiness',
'https://somedomain.com/path/to/happiness',
],
[
'url with non-standard port',
'https://somedomain.com:4200/path/to/happiness',
'https://somedomain.com:4200/path/to/happiness',
],
[
'url with query params',
'https://somedomain.com:4200/path/to/happiness?auhtToken=abc123&param2=bar',
'https://somedomain.com:4200/path/to/happiness',
],
[
'url with a fragment',
'https://somedomain.com/path/to/happiness#somewildfragment123',
'https://somedomain.com/path/to/happiness',
],
[
'url with a fragment and query params',
'https://somedomain.com/path/to/happiness#somewildfragment123?auhtToken=abc123&param2=bar',
'https://somedomain.com/path/to/happiness',
],
[
'url with authorization',
'https://username:[email protected]',
'https://%filtered%:%filtered%@somedomain.com/',
],
['same-origin url', '/api/v4/users?id=123', '/api/v4/users'],
['url with port 8080', 'http://172.31.12.144:8080/test', 'http://172.31.12.144:8080/test'],
['url with port 4433', 'http://172.31.12.144:4433/test', 'http://172.31.12.144:4433/test'],
['url with port 443', 'http://172.31.12.144:443/test', 'http://172.31.12.144/test'],
['url with IP and port 80', 'http://172.31.12.144:80/test', 'http://172.31.12.144/test'],
])('returns a sanitized URL for a %s', (_, rawUrl: string, sanitizedURL: string) => {
const urlObject = parseStringToURLObject(rawUrl);
if (!urlObject) {
throw new Error('Invalid URL');
}
expect(getSanitizedUrlStringFromUrlObject(urlObject)).toEqual(sanitizedURL);
});
});
Loading