From f570a5f01376bf66bd004efcbdcfbb5d25f36943 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Thu, 27 Mar 2025 16:04:35 -0400 Subject: [PATCH 1/6] ref(core): Improve URL parsing utilities --- packages/core/src/fetch.ts | 72 ++++++++-------- packages/core/src/utils-hoist/index.ts | 9 +- packages/core/src/utils-hoist/url.ts | 76 ++++++++++++++++- packages/core/test/utils-hoist/url.test.ts | 95 ++++++++++++++++++++-- 4 files changed, 209 insertions(+), 43 deletions(-) diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts index 650cad83effa..099114296c02 100644 --- a/packages/core/src/fetch.ts +++ b/packages/core/src/fetch.ts @@ -1,12 +1,11 @@ -/* eslint-disable complexity */ import { getClient } from './currentScopes'; import { SEMANTIC_ATTRIBUTE_SENTRY_OP, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from './semanticAttributes'; import { SPAN_STATUS_ERROR, setHttpStatus, startInactiveSpan } from './tracing'; import { SentryNonRecordingSpan } from './tracing/sentryNonRecordingSpan'; -import type { FetchBreadcrumbHint, HandlerDataFetch, Span, SpanOrigin } from './types-hoist'; +import type { FetchBreadcrumbHint, HandlerDataFetch, Span, SpanAttributes, SpanOrigin } from './types-hoist'; import { SENTRY_BAGGAGE_KEY_PREFIX } from './utils-hoist/baggage'; import { isInstanceOf } from './utils-hoist/is'; -import { parseStringToURL, stripUrlQueryAndFragment } from './utils-hoist/url'; +import { isURLObjectRelative, parseStringToURLObject } from './utils-hoist/url'; import { hasSpansEnabled } from './utils/hasSpansEnabled'; import { getActiveSpan } from './utils/spanUtils'; import { getTraceData } from './utils/traceData'; @@ -54,40 +53,11 @@ export function instrumentFetchRequest( return undefined; } - // Curious about `thismessage:/`? See: https://www.rfc-editor.org/rfc/rfc2557.html - // > When the methods above do not yield an absolute URI, a base URL - // > of "thismessage:/" MUST be employed. This base URL has been - // > defined for the sole purpose of resolving relative references - // > within a multipart/related structure when no other base URI is - // > specified. - // - // We need to provide a base URL to `parseStringToURL` because the fetch API gives us a - // relative URL sometimes. - // - // This is the only case where we need to provide a base URL to `parseStringToURL` - // because the relative URL is not valid on its own. - const parsedUrl = url.startsWith('/') ? parseStringToURL(url, 'thismessage:/') : parseStringToURL(url); - const fullUrl = url.startsWith('/') ? undefined : parsedUrl?.href; - const hasParent = !!getActiveSpan(); const span = shouldCreateSpanResult && hasParent - ? startInactiveSpan({ - name: `${method} ${stripUrlQueryAndFragment(url)}`, - attributes: { - url, - type: 'fetch', - 'http.method': method, - 'http.url': parsedUrl?.href, - [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin, - [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client', - ...(fullUrl && { 'http.url': fullUrl }), - ...(fullUrl && parsedUrl?.host && { 'server.address': parsedUrl.host }), - ...(parsedUrl?.search && { 'http.query': parsedUrl.search }), - ...(parsedUrl?.hash && { 'http.fragment': parsedUrl.hash }), - }, - }) + ? startInactiveSpan(getSpanStartOptions(url, method, spanOrigin)) : new SentryNonRecordingSpan(); handlerData.fetchData.__span = span.spanContext().spanId; @@ -264,3 +234,39 @@ function isRequest(request: unknown): request is Request { function isHeaders(headers: unknown): headers is Headers { return typeof Headers !== 'undefined' && isInstanceOf(headers, Headers); } + +function getSpanStartOptions( + url: string, + method: string, + spanOrigin: SpanOrigin, +): Parameters[0] { + const parsedUrl = parseStringToURLObject(url); + return { + name: parsedUrl ? `${method} ${parsedUrl.pathname}` : method, + attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin), + }; +} + +function getFetchSpanAttributes( + url: string, + parsedUrl: ReturnType, + method: string, + spanOrigin: SpanOrigin, +): SpanAttributes { + const attributes: SpanAttributes = { + url, + type: 'fetch', + 'http.method': method, + [SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin, + [SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client', + }; + if (parsedUrl) { + if (!isURLObjectRelative(parsedUrl)) { + attributes['http.url'] = parsedUrl.href; + attributes['server.address'] = parsedUrl.host; + } + attributes['http.query'] = parsedUrl.search; + attributes['http.fragment'] = parsedUrl.hash; + } + return attributes; +} diff --git a/packages/core/src/utils-hoist/index.ts b/packages/core/src/utils-hoist/index.ts index 990ad55fcc8e..2bb15f1423dc 100644 --- a/packages/core/src/utils-hoist/index.ts +++ b/packages/core/src/utils-hoist/index.ts @@ -122,7 +122,14 @@ export { objectToBaggageHeader, } from './baggage'; -export { getSanitizedUrlString, parseUrl, stripUrlQueryAndFragment } from './url'; +export { + getSanitizedUrlString, + parseUrl, + stripUrlQueryAndFragment, + parseStringToURLObject, + isURLObjectRelative, + getSanitizedUrlStringFromUrlObject, +} from './url'; export { eventFromMessage, eventFromUnknownInput, exceptionFromError, parseStackFrames } from './eventbuilder'; export { callFrameToStackFrame, watchdogTimer } from './anr'; export { LRUMap } from './lru'; diff --git a/packages/core/src/utils-hoist/url.ts b/packages/core/src/utils-hoist/url.ts index 0b542cf14d6c..46d625f2d1e1 100644 --- a/packages/core/src/utils-hoist/url.ts +++ b/packages/core/src/utils-hoist/url.ts @@ -11,13 +11,50 @@ interface URLwithCanParse extends URL { canParse: (url: string, base?: string | URL | undefined) => boolean; } +// A subset of the URL object that is valid for relative URLs +// The URL object cannot handle relative URLs, so we need to handle them separately +type RelativeURL = { + isRelative: true; + pathname: URL['pathname']; + search: URL['search']; + hash: URL['hash']; +}; + +type URLObject = RelativeURL | URL; + +// Curious about `thismessage:/`? See: https://www.rfc-editor.org/rfc/rfc2557.html +// > When the methods above do not yield an absolute URI, a base URL +// > of "thismessage:/" MUST be employed. This base URL has been +// > defined for the sole purpose of resolving relative references +// > within a multipart/related structure when no other base URI is +// > specified. +// +// We need to provide a base URL to `parseStringToURLObject` because the fetch API gives us a +// relative URL sometimes. +// +// This is the only case where we need to provide a base URL to `parseStringToURLObject` +// because the relative URL is not valid on its own. +const DEFAULT_BASE_URL = 'thismessage:/'; + +/** + * Checks if the URL object is relative + * + * @param url - The URL object to check + * @returns True if the URL object is relative, false otherwise + */ +export function isURLObjectRelative(url: URLObject): url is RelativeURL { + return 'isRelative' in url; +} + /** * Parses string to a URL object * * @param url - The URL to parse * @returns The parsed URL object or undefined if the URL is invalid */ -export function parseStringToURL(url: string, base?: string | URL | undefined): URL | undefined { +export function parseStringToURLObject(url: string, urlBase?: string | URL | undefined): URLObject | undefined { + const isRelative = url.startsWith('/'); + const base = urlBase ?? (isRelative ? DEFAULT_BASE_URL : undefined); try { // Use `canParse` to short-circuit the URL constructor if it's not a valid URL // This is faster than trying to construct the URL and catching the error @@ -26,7 +63,17 @@ export function parseStringToURL(url: string, base?: string | URL | undefined): return undefined; } - return new URL(url, base); + const fullUrlObject = new URL(url, base); + if (isRelative) { + // Because we used a fake base URL, we need to return a relative URL object + return { + isRelative, + pathname: fullUrlObject.pathname, + search: fullUrlObject.search, + hash: fullUrlObject.hash, + }; + } + return fullUrlObject; } catch { // empty body } @@ -34,6 +81,31 @@ export function parseStringToURL(url: string, base?: string | URL | undefined): return undefined; } +/** + * Takes a URL object and returns a sanitized string which is safe to use as span name + * see: https://develop.sentry.dev/sdk/data-handling/#structuring-data + */ +export function getSanitizedUrlStringFromUrlObject(url: URLObject): string { + if (isURLObjectRelative(url)) { + return url.pathname; + } + + const newUrl = new URL(url); + newUrl.search = ''; + newUrl.hash = ''; + if (['80', '443'].includes(newUrl.port)) { + newUrl.port = ''; + } + if (newUrl.password) { + newUrl.password = '%filtered%'; + } + if (newUrl.username) { + newUrl.username = '%filtered%'; + } + + return newUrl.toString(); +} + /** * Parses string form of URL into an object * // borrowed from https://tools.ietf.org/html/rfc3986#appendix-B diff --git a/packages/core/test/utils-hoist/url.test.ts b/packages/core/test/utils-hoist/url.test.ts index 8cb0a945c2d5..14af8c5c2403 100644 --- a/packages/core/test/utils-hoist/url.test.ts +++ b/packages/core/test/utils-hoist/url.test.ts @@ -1,5 +1,12 @@ import { describe, expect, it } from 'vitest'; -import { getSanitizedUrlString, parseStringToURL, parseUrl, stripUrlQueryAndFragment } from '../../src/utils-hoist/url'; +import { + getSanitizedUrlString, + parseUrl, + stripUrlQueryAndFragment, + parseStringToURLObject, + isURLObjectRelative, + getSanitizedUrlStringFromUrlObject, +} from '../../src/utils-hoist/url'; describe('stripQueryStringAndFragment', () => { const urlString = 'http://dogs.are.great:1231/yay/'; @@ -62,8 +69,6 @@ describe('getSanitizedUrlString', () => { 'https://[filtered]:[filtered]@somedomain.com', ], ['same-origin url', '/api/v4/users?id=123', '/api/v4/users'], - ['url without a protocol', 'example.com', 'example.com'], - ['url without a protocol with a path', 'example.com/sub/path?id=123', 'example.com/sub/path'], ['url with port 8080', 'http://172.31.12.144:8080/test', 'http://172.31.12.144:8080/test'], ['url with port 4433', 'http://172.31.12.144:4433/test', 'http://172.31.12.144:4433/test'], ['url with port 443', 'http://172.31.12.144:443/test', 'http://172.31.12.144/test'], @@ -197,19 +202,95 @@ describe('parseUrl', () => { }); }); -describe('parseStringToURL', () => { +describe('parseStringToURLObject', () => { it('returns undefined for invalid URLs', () => { - expect(parseStringToURL('invalid-url')).toBeUndefined(); + expect(parseStringToURLObject('invalid-url')).toBeUndefined(); }); it('returns a URL object for valid URLs', () => { - expect(parseStringToURL('https://somedomain.com')).toBeInstanceOf(URL); + expect(parseStringToURLObject('https://somedomain.com')).toBeInstanceOf(URL); + }); + + it('returns a URL object for valid URLs with a base URL', () => { + expect(parseStringToURLObject('https://somedomain.com', 'https://base.com')).toBeInstanceOf(URL); + }); + + it('returns a relative URL object for relative URLs', () => { + expect(parseStringToURLObject('/path/to/happiness')).toEqual({ + isRelative: true, + pathname: '/path/to/happiness', + search: '', + hash: '', + }); }); it('does not throw an error if URl.canParse is not defined', () => { const canParse = (URL as any).canParse; delete (URL as any).canParse; - expect(parseStringToURL('https://somedomain.com')).toBeInstanceOf(URL); + expect(parseStringToURLObject('https://somedomain.com')).toBeInstanceOf(URL); (URL as any).canParse = canParse; }); }); + +describe('isURLObjectRelative', () => { + it('returns true for relative URLs', () => { + expect(isURLObjectRelative(parseStringToURLObject('/path/to/happiness')!)).toBe(true); + }); + + it('returns false for absolute URLs', () => { + expect(isURLObjectRelative(parseStringToURLObject('https://somedomain.com')!)).toBe(false); + }); +}); + +describe('getSanitizedUrlStringFromUrlObject', () => { + it.each([ + ['regular url', 'https://somedomain.com', 'https://somedomain.com/'], + ['regular url with a path', 'https://somedomain.com/path/to/happiness', 'https://somedomain.com/path/to/happiness'], + [ + 'url with standard http port 80', + 'http://somedomain.com:80/path/to/happiness', + 'http://somedomain.com/path/to/happiness', + ], + [ + 'url with standard https port 443', + 'https://somedomain.com:443/path/to/happiness', + 'https://somedomain.com/path/to/happiness', + ], + [ + 'url with non-standard port', + 'https://somedomain.com:4200/path/to/happiness', + 'https://somedomain.com:4200/path/to/happiness', + ], + [ + 'url with query params', + 'https://somedomain.com:4200/path/to/happiness?auhtToken=abc123¶m2=bar', + 'https://somedomain.com:4200/path/to/happiness', + ], + [ + 'url with a fragment', + 'https://somedomain.com/path/to/happiness#somewildfragment123', + 'https://somedomain.com/path/to/happiness', + ], + [ + 'url with a fragment and query params', + 'https://somedomain.com/path/to/happiness#somewildfragment123?auhtToken=abc123¶m2=bar', + 'https://somedomain.com/path/to/happiness', + ], + [ + 'url with authorization', + 'https://username:password@somedomain.com', + 'https://%filtered%:%filtered%@somedomain.com/', + ], + ['same-origin url', '/api/v4/users?id=123', '/api/v4/users'], + ['url with port 8080', 'http://172.31.12.144:8080/test', 'http://172.31.12.144:8080/test'], + ['url with port 4433', 'http://172.31.12.144:4433/test', 'http://172.31.12.144:4433/test'], + ['url with port 443', 'http://172.31.12.144:443/test', 'http://172.31.12.144/test'], + ['url with IP and port 80', 'http://172.31.12.144:80/test', 'http://172.31.12.144/test'], + ])('returns a sanitized URL for a %s', (_, rawUrl: string, sanitizedURL: string) => { + const urlObject = parseStringToURLObject(rawUrl); + if (!urlObject) { + throw new Error('Invalid URL'); + } + expect(getSanitizedUrlStringFromUrlObject(urlObject)).toEqual(sanitizedURL); + }); +}); From 0b91b93ce279413779e082cecd728792033997b6 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Thu, 27 Mar 2025 16:42:44 -0400 Subject: [PATCH 2/6] make sure we include href --- packages/core/src/fetch.ts | 2 +- packages/core/src/utils-hoist/url.ts | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts index 099114296c02..62d9d71798a2 100644 --- a/packages/core/src/fetch.ts +++ b/packages/core/src/fetch.ts @@ -242,7 +242,7 @@ function getSpanStartOptions( ): Parameters[0] { const parsedUrl = parseStringToURLObject(url); return { - name: parsedUrl ? `${method} ${parsedUrl.pathname}` : method, + name: parsedUrl ? `${method} ${isURLObjectRelative(parsedUrl) ? parsedUrl.pathname : parsedUrl.href}` : method, attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin), }; } diff --git a/packages/core/src/utils-hoist/url.ts b/packages/core/src/utils-hoist/url.ts index 46d625f2d1e1..7a7893a36b68 100644 --- a/packages/core/src/utils-hoist/url.ts +++ b/packages/core/src/utils-hoist/url.ts @@ -65,7 +65,8 @@ export function parseStringToURLObject(url: string, urlBase?: string | URL | und const fullUrlObject = new URL(url, base); if (isRelative) { - // Because we used a fake base URL, we need to return a relative URL object + // Because we used a fake base URL, we need to return a relative URL object. + // We cannot return anything about the origin, host, etc. because it will refer to the fake base URL. return { isRelative, pathname: fullUrlObject.pathname, From 8cc57de0dd1c8644e98832fc4639334a4da2a0e6 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Thu, 27 Mar 2025 19:44:41 -0400 Subject: [PATCH 3/6] use url sanitization helper --- packages/core/src/fetch.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts index 62d9d71798a2..1e34ace59249 100644 --- a/packages/core/src/fetch.ts +++ b/packages/core/src/fetch.ts @@ -5,7 +5,7 @@ import { SentryNonRecordingSpan } from './tracing/sentryNonRecordingSpan'; import type { FetchBreadcrumbHint, HandlerDataFetch, Span, SpanAttributes, SpanOrigin } from './types-hoist'; import { SENTRY_BAGGAGE_KEY_PREFIX } from './utils-hoist/baggage'; import { isInstanceOf } from './utils-hoist/is'; -import { isURLObjectRelative, parseStringToURLObject } from './utils-hoist/url'; +import { getSanitizedUrlStringFromUrlObject, isURLObjectRelative, parseStringToURLObject } from './utils-hoist/url'; import { hasSpansEnabled } from './utils/hasSpansEnabled'; import { getActiveSpan } from './utils/spanUtils'; import { getTraceData } from './utils/traceData'; @@ -242,7 +242,7 @@ function getSpanStartOptions( ): Parameters[0] { const parsedUrl = parseStringToURLObject(url); return { - name: parsedUrl ? `${method} ${isURLObjectRelative(parsedUrl) ? parsedUrl.pathname : parsedUrl.href}` : method, + name: parsedUrl ? `${method} ${getSanitizedUrlStringFromUrlObject(parsedUrl)}` : method, attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin), }; } From a2d3ae6ec2e63eb37924ad19fc9be72f228540f6 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Fri, 28 Mar 2025 13:10:49 -0400 Subject: [PATCH 4/6] make sure we only set if defined --- packages/core/src/fetch.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts index 1e34ace59249..5f0c9cc30b56 100644 --- a/packages/core/src/fetch.ts +++ b/packages/core/src/fetch.ts @@ -265,8 +265,12 @@ function getFetchSpanAttributes( attributes['http.url'] = parsedUrl.href; attributes['server.address'] = parsedUrl.host; } - attributes['http.query'] = parsedUrl.search; - attributes['http.fragment'] = parsedUrl.hash; + if (parsedUrl.search) { + attributes['http.query'] = parsedUrl.search; + } + if (parsedUrl.hash) { + attributes['http.fragment'] = parsedUrl.hash; + } } return attributes; } From 1abc4ddb1fe08cd6dd8d5553b7e9de9f56f20a45 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Fri, 28 Mar 2025 13:49:35 -0400 Subject: [PATCH 5/6] trailing slash --- .../test-applications/nextjs-13/tests/client/fetch.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-packages/e2e-tests/test-applications/nextjs-13/tests/client/fetch.test.ts b/dev-packages/e2e-tests/test-applications/nextjs-13/tests/client/fetch.test.ts index 4bdafbc7ed10..bd091fcfd354 100644 --- a/dev-packages/e2e-tests/test-applications/nextjs-13/tests/client/fetch.test.ts +++ b/dev-packages/e2e-tests/test-applications/nextjs-13/tests/client/fetch.test.ts @@ -43,7 +43,7 @@ test('should correctly instrument `fetch` for performance tracing', async ({ pag 'sentry.op': 'http.client', 'sentry.origin': 'auto.http.browser', }, - description: 'GET https://example.com', + description: 'GET https://example.com/', op: 'http.client', parent_span_id: expect.stringMatching(/[a-f0-9]{16}/), span_id: expect.stringMatching(/[a-f0-9]{16}/), From e389f1980235e6183a07fe7be6d17841cdf0f3fd Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Tue, 1 Apr 2025 09:36:49 +0200 Subject: [PATCH 6/6] Adjust size-limit --- .size-limit.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.size-limit.js b/.size-limit.js index 7bab8160966b..883156b803ce 100644 --- a/.size-limit.js +++ b/.size-limit.js @@ -219,7 +219,7 @@ module.exports = [ import: createImport('init'), ignore: ['$app/stores'], gzip: true, - limit: '38 KB', + limit: '38.5 KB', }, // Node SDK (ESM) {