From b5b7cb0438296cd6554cd42504a60661e07ddc86 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Sat, 15 Feb 2025 22:05:00 +0200 Subject: [PATCH 01/13] fix(smart-apply): stripping of markdown code blocks --- lib/shared/src/chat/preamble.ts | 2 +- .../src/edit/output/response-transformer.ts | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/shared/src/chat/preamble.ts b/lib/shared/src/chat/preamble.ts index c21f8d97197f..454bb041aa9a 100644 --- a/lib/shared/src/chat/preamble.ts +++ b/lib/shared/src/chat/preamble.ts @@ -9,7 +9,7 @@ const DEFAULT_PREAMBLE = ps`You are Cody, an AI coding assistant from Sourcegrap * produce code blocks that we can associate executable commands or content with existing file paths. * We want to read these file paths to support applying code directly to files from chat for Smart Apply. */ -const SMART_APPLY_PREAMBLE = ps`If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`$LANGUAGE:$FILEPATH\`\`\` +const SMART_APPLY_PREAMBLE = ps`If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`\`$LANGUAGE:$FILEPATH\`\`\`\` For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.` const CHAT_PREAMBLE = DEFAULT_PREAMBLE.concat(SMART_APPLY_PREAMBLE) diff --git a/vscode/src/edit/output/response-transformer.ts b/vscode/src/edit/output/response-transformer.ts index 3555da365661..6b61d52d625d 100644 --- a/vscode/src/edit/output/response-transformer.ts +++ b/vscode/src/edit/output/response-transformer.ts @@ -21,14 +21,14 @@ const PROMPT_TOPIC_REGEX = new RegExp( * Regular expressions to identify markdown code blocks, and then strip the start and end delimiters. * Important for compatibility with different chat models, due to most chat models being trained to output Markdown. */ -const MARKDOWN_CODE_BLOCK_DELIMITER_START = '```(?:\\w+)?' -const MARKDOWN_CODE_BLOCK_DELIMITER_END = '```' -const MARKDOWN_CODE_BLOCK_START = new RegExp(`^${MARKDOWN_CODE_BLOCK_DELIMITER_START}`) -const MARKDOWN_CODE_BLOCK_END = new RegExp(`${MARKDOWN_CODE_BLOCK_DELIMITER_END}$`) -const MARKDOWN_CODE_BLOCK_REGEX = new RegExp( - `${MARKDOWN_CODE_BLOCK_DELIMITER_START}\\s*([\\s\\S]*?)\\s*${MARKDOWN_CODE_BLOCK_DELIMITER_END}`, - 'g' -) +// const MARKDOWN_CODE_BLOCK_DELIMITER_START = '```(?:\\w+)?' +// const MARKDOWN_CODE_BLOCK_DELIMITER_END = '```' +// const MARKDOWN_CODE_BLOCK_START = new RegExp(`^${MARKDOWN_CODE_BLOCK_DELIMITER_START}`) +// const MARKDOWN_CODE_BLOCK_END = new RegExp(`${MARKDOWN_CODE_BLOCK_DELIMITER_END}$`) +// const MARKDOWN_CODE_BLOCK_REGEX = new RegExp( +// `${MARKDOWN_CODE_BLOCK_DELIMITER_START}\\s*([\\s\\S]*?)\\s*${MARKDOWN_CODE_BLOCK_DELIMITER_END}`, +// 'g' +// ) const LEADING_SPACES_AND_NEW_LINES = /^\s*\n/ const LEADING_SPACES = /^[ ]+/ @@ -46,10 +46,10 @@ export function responseTransformer( const strippedText = text // Strip specific XML tags referenced in the prompt, e.g. .replaceAll(PROMPT_TOPIC_REGEX, '') - // Strip Markdown syntax for code blocks, e.g. ```typescript. - .replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => - block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') - ) + // Strip Markdown syntax for code blocks, e.g. ```typescript. + // .replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => + // block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') + // ) // Trim leading spaces // - For `add` insertions, the LLM will attempt to continue the code from the position of the cursor, we handle the `insertionPoint` From a1815143cbbd7a0aefbad91f922bca3b86ee900e Mon Sep 17 00:00:00 2001 From: David Ichim Date: Sun, 16 Feb 2025 14:46:11 +0200 Subject: [PATCH 02/13] fix(chat): ensure code block formatting is preserved and closed properly Add instructions in the chat preamble to close fenced code blocks correctly and introduce a transform that wraps code blocks starting with ```markdown in backticks to preserve their formatting in the UI when it contains other code blocks. --- lib/shared/src/chat/preamble.ts | 3 ++- vscode/webviews/components/MarkdownFromCody.tsx | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lib/shared/src/chat/preamble.ts b/lib/shared/src/chat/preamble.ts index 454bb041aa9a..ee97a9958b55 100644 --- a/lib/shared/src/chat/preamble.ts +++ b/lib/shared/src/chat/preamble.ts @@ -9,7 +9,8 @@ const DEFAULT_PREAMBLE = ps`You are Cody, an AI coding assistant from Sourcegrap * produce code blocks that we can associate executable commands or content with existing file paths. * We want to read these file paths to support applying code directly to files from chat for Smart Apply. */ -const SMART_APPLY_PREAMBLE = ps`If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`\`$LANGUAGE:$FILEPATH\`\`\`\` +const SMART_APPLY_PREAMBLE = ps`If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`\`$LANGUAGE:$FILEPATH\`\`\`\`. +If you started a fenced code block, ensure you close it with the same number of backticks before the end of your message. For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.` const CHAT_PREAMBLE = DEFAULT_PREAMBLE.concat(SMART_APPLY_PREAMBLE) diff --git a/vscode/webviews/components/MarkdownFromCody.tsx b/vscode/webviews/components/MarkdownFromCody.tsx index dc952a29961f..8796653aa3e2 100644 --- a/vscode/webviews/components/MarkdownFromCody.tsx +++ b/vscode/webviews/components/MarkdownFromCody.tsx @@ -81,16 +81,31 @@ const URL_PROCESSORS: Partial> = { [CodyIDE.VSCode]: wrapLinksWithCodyOpenCommand, } +/** + * Transforms the children string by wrapping it in backticks if it starts with '```markdown'. + * This is used to preserve the formatting of Markdown code blocks within the Markdown content. + * + * @param children - The string to transform. + * @returns The transformed string. + */ +const childrenTransform = (children: string): string => { + if (children.startsWith('```markdown')) { + return '`' + children + '`' + } + return children +} + export const MarkdownFromCody: FunctionComponent<{ className?: string; children: string }> = ({ className, children, }) => { const clientType = useConfig().clientCapabilities.agentIDE const urlTransform = useMemo(() => URL_PROCESSORS[clientType] ?? defaultUrlProcessor, [clientType]) + const chatReplyTransformed = childrenTransform(children) return ( - {children} + {chatReplyTransformed} ) } From 66b3bc46f0e236d9e41ce8ae6768983de7088ec6 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Sun, 16 Feb 2025 21:13:40 +0200 Subject: [PATCH 03/13] fix(test-fixtures): comment out markdown syntax test cases - After removing the filtering having these tests means they fail --- vscode/src/edit/output/test-fixtures.ts | 40 ++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/vscode/src/edit/output/test-fixtures.ts b/vscode/src/edit/output/test-fixtures.ts index 6c61ab109ffe..4165f3dfeafc 100644 --- a/vscode/src/edit/output/test-fixtures.ts +++ b/vscode/src/edit/output/test-fixtures.ts @@ -29,26 +29,26 @@ export const RESPONSE_TEST_FIXTURES: Record = { expected: CLEAN_RESPONSE, task: DEFAULT_TASK, }, - withMarkdownSyntax: { - response: '```\n' + CLEAN_RESPONSE + '```', - expected: CLEAN_RESPONSE, - task: DEFAULT_TASK, - }, - withMarkdownSyntaxAndLang: { - response: '```typescript\n' + CLEAN_RESPONSE + '```', - expected: CLEAN_RESPONSE, - task: DEFAULT_TASK, - }, - withMarkdownSyntaxAndTags: { - response: - '```\n' + - `<${PROMPT_TOPICS.OUTPUT}>` + - CLEAN_RESPONSE + - `` + - '```', - expected: CLEAN_RESPONSE, - task: DEFAULT_TASK, - }, + // withMarkdownSyntax: { + // response: '```\n' + CLEAN_RESPONSE + '```', + // expected: CLEAN_RESPONSE, + // task: DEFAULT_TASK, + // }, + // withMarkdownSyntaxAndLang: { + // response: '```typescript\n' + CLEAN_RESPONSE + '```', + // expected: CLEAN_RESPONSE, + // task: DEFAULT_TASK, + // }, + // withMarkdownSyntaxAndTags: { + // response: + // '```\n' + + // `<${PROMPT_TOPICS.OUTPUT}>` + + // CLEAN_RESPONSE + + // `` + + // '```', + // expected: CLEAN_RESPONSE, + // task: DEFAULT_TASK, + // }, withHtmlEntities: { response: CLEAN_RESPONSE.replace(//g, '>'), expected: CLEAN_RESPONSE, From 41d97d2a06060c42ef0abe9bc63a2adbf6781254 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Sun, 16 Feb 2025 21:28:56 +0200 Subject: [PATCH 04/13] change(chat): export CHAT_PREAMBLE for use in prompt tests --- lib/shared/src/chat/preamble.ts | 2 +- vscode/src/chat/chat-view/prompt.test.ts | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/shared/src/chat/preamble.ts b/lib/shared/src/chat/preamble.ts index ee97a9958b55..f7c54cef1ba9 100644 --- a/lib/shared/src/chat/preamble.ts +++ b/lib/shared/src/chat/preamble.ts @@ -13,7 +13,7 @@ const SMART_APPLY_PREAMBLE = ps`If your answer contains fenced code blocks in Ma If you started a fenced code block, ensure you close it with the same number of backticks before the end of your message. For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.` -const CHAT_PREAMBLE = DEFAULT_PREAMBLE.concat(SMART_APPLY_PREAMBLE) +export const CHAT_PREAMBLE = DEFAULT_PREAMBLE.concat(SMART_APPLY_PREAMBLE) export function getSimplePreamble( model: ChatModel | EditModel | undefined, diff --git a/vscode/src/chat/chat-view/prompt.test.ts b/vscode/src/chat/chat-view/prompt.test.ts index e036f156e02f..d87087d1db20 100644 --- a/vscode/src/chat/chat-view/prompt.test.ts +++ b/vscode/src/chat/chat-view/prompt.test.ts @@ -1,5 +1,6 @@ import { AUTH_STATUS_FIXTURE_AUTHED, + CHAT_PREAMBLE, CLIENT_CAPABILITIES_FIXTURE, type ContextItem, ContextItemSource, @@ -83,8 +84,7 @@ describe('DefaultPrompter', () => { [ { "speaker": "human", - "text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`$LANGUAGE:$FILEPATH\`\`\` - For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.", + "text": "${CHAT_PREAMBLE}", }, { "speaker": "assistant", @@ -164,10 +164,7 @@ describe('DefaultPrompter', () => { [ { "speaker": "human", - "text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`$LANGUAGE:$FILEPATH\`\`\` - For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside. - - Always respond with 🧀 emojis", + "text": "${CHAT_PREAMBLE} Always respond with 🧀 emojis", }, { "speaker": "assistant", From 6d7e708b66776ac13306c890c8c4b8c37b32d449 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Sun, 16 Feb 2025 21:45:14 +0200 Subject: [PATCH 05/13] Use static value of pre-prompt until code is finalized --- vscode/src/chat/chat-view/prompt.test.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/vscode/src/chat/chat-view/prompt.test.ts b/vscode/src/chat/chat-view/prompt.test.ts index d87087d1db20..47ad039b8465 100644 --- a/vscode/src/chat/chat-view/prompt.test.ts +++ b/vscode/src/chat/chat-view/prompt.test.ts @@ -1,6 +1,5 @@ import { AUTH_STATUS_FIXTURE_AUTHED, - CHAT_PREAMBLE, CLIENT_CAPABILITIES_FIXTURE, type ContextItem, ContextItemSource, @@ -84,7 +83,9 @@ describe('DefaultPrompter', () => { [ { "speaker": "human", - "text": "${CHAT_PREAMBLE}", + "text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`\`$LANGUAGE:$FILEPATH\`\`\`\`. + If you started a fenced code block, ensure you close it with the same number of backticks before the end of your message. + For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.", }, { "speaker": "assistant", @@ -164,7 +165,11 @@ describe('DefaultPrompter', () => { [ { "speaker": "human", - "text": "${CHAT_PREAMBLE} Always respond with 🧀 emojis", + "text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`\`$LANGUAGE:$FILEPATH\`\`\`\`. + If you started a fenced code block, ensure you close it with the same number of backticks before the end of your message. + For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside. + + Always respond with 🧀 emojis", }, { "speaker": "assistant", From 67b784f13b13fed53f247815e40a9fee39691f8b Mon Sep 17 00:00:00 2001 From: David Ichim Date: Mon, 17 Feb 2025 10:25:14 +0200 Subject: [PATCH 06/13] fix(prompt): test to replace hardcoded prompt text with CHAT_PREAMBLE constant We need to check for PromptString as that is what prompt contains, not just simple strings --- vscode/src/chat/chat-view/prompt.test.ts | 51 ++++++++++-------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/vscode/src/chat/chat-view/prompt.test.ts b/vscode/src/chat/chat-view/prompt.test.ts index 47ad039b8465..f56df7149644 100644 --- a/vscode/src/chat/chat-view/prompt.test.ts +++ b/vscode/src/chat/chat-view/prompt.test.ts @@ -1,5 +1,6 @@ import { AUTH_STATUS_FIXTURE_AUTHED, + CHAT_PREAMBLE, CLIENT_CAPABILITIES_FIXTURE, type ContextItem, ContextItemSource, @@ -79,26 +80,22 @@ describe('DefaultPrompter', () => { chat.addHumanMessage({ text: ps`Hello` }) const { prompt, context } = await new DefaultPrompter([], []).makePrompt(chat, 0) - expect(prompt).toMatchInlineSnapshot(` - [ + expect(prompt).toEqual([ { - "speaker": "human", - "text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`\`$LANGUAGE:$FILEPATH\`\`\`\`. - If you started a fenced code block, ensure you close it with the same number of backticks before the end of your message. - For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.", + speaker: 'human', + text: CHAT_PREAMBLE, }, { - "speaker": "assistant", - "text": "I am Cody, an AI coding assistant from Sourcegraph.", + speaker: 'assistant', + text: ps`I am Cody, an AI coding assistant from Sourcegraph.`, }, { - "contextAlternatives": undefined, - "contextFiles": undefined, - "speaker": "human", - "text": "Hello", + contextAlternatives: undefined, + contextFiles: undefined, + speaker: 'human', + text: ps`Hello`, }, - ] - `) + ]) expect(context.used).toEqual([]) expect(context.ignored).toEqual([]) }) @@ -161,28 +158,22 @@ describe('DefaultPrompter', () => { chat.addHumanMessage({ text: ps`Hello` }) const { prompt, context } = await new DefaultPrompter([], []).makePrompt(chat, 0) - expect(prompt).toMatchInlineSnapshot(` - [ + expect(prompt).toEqual([ { - "speaker": "human", - "text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`\`$LANGUAGE:$FILEPATH\`\`\`\`. - If you started a fenced code block, ensure you close it with the same number of backticks before the end of your message. - For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside. - - Always respond with 🧀 emojis", + speaker: 'human', + text: ps`${CHAT_PREAMBLE}\n\nAlways respond with 🧀 emojis`, }, { - "speaker": "assistant", - "text": "I am Cody, an AI coding assistant from Sourcegraph.", + speaker: 'assistant', + text: ps`I am Cody, an AI coding assistant from Sourcegraph.`, }, { - "contextAlternatives": undefined, - "contextFiles": undefined, - "speaker": "human", - "text": "Hello", + contextAlternatives: undefined, + contextFiles: undefined, + speaker: 'human', + text: ps`Hello`, }, - ] - `) + ]) expect(context.used).toEqual([]) expect(context.ignored).toEqual([]) }) From 675047dd8c18fa750d20253358df4458bd161f1d Mon Sep 17 00:00:00 2001 From: David Ichim Date: Mon, 17 Feb 2025 23:56:48 +0200 Subject: [PATCH 07/13] fix(preamble): revert SMART_APPLY_PREAMBLE change and export CHAT_PREAMBLE --- lib/shared/src/chat/preamble.ts | 3 +-- lib/shared/src/index.ts | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/shared/src/chat/preamble.ts b/lib/shared/src/chat/preamble.ts index f7c54cef1ba9..6b6d879ab4c0 100644 --- a/lib/shared/src/chat/preamble.ts +++ b/lib/shared/src/chat/preamble.ts @@ -9,8 +9,7 @@ const DEFAULT_PREAMBLE = ps`You are Cody, an AI coding assistant from Sourcegrap * produce code blocks that we can associate executable commands or content with existing file paths. * We want to read these file paths to support applying code directly to files from chat for Smart Apply. */ -const SMART_APPLY_PREAMBLE = ps`If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`\`$LANGUAGE:$FILEPATH\`\`\`\`. -If you started a fenced code block, ensure you close it with the same number of backticks before the end of your message. +const SMART_APPLY_PREAMBLE = ps`If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`$LANGUAGE:$FILEPATH\`\`\` For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.` export const CHAT_PREAMBLE = DEFAULT_PREAMBLE.concat(SMART_APPLY_PREAMBLE) diff --git a/lib/shared/src/index.ts b/lib/shared/src/index.ts index c92de173fb75..19571ade5a8d 100644 --- a/lib/shared/src/index.ts +++ b/lib/shared/src/index.ts @@ -54,7 +54,7 @@ export { } from './models/utils' export { BotResponseMultiplexer } from './chat/bot-response-multiplexer' export { ChatClient } from './chat/chat' -export { getSimplePreamble } from './chat/preamble' +export { getSimplePreamble, CHAT_PREAMBLE } from './chat/preamble' export type { SerializedChatInteraction, SerializedChatTranscript, From 7e6bb74eb134ab62f3b7f08ef3d16b221dba80b8 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Mon, 17 Feb 2025 23:58:46 +0200 Subject: [PATCH 08/13] fix(chat): enhance childrenTransform to handle '```markdown' replacement --- .../webviews/components/MarkdownFromCody.tsx | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/vscode/webviews/components/MarkdownFromCody.tsx b/vscode/webviews/components/MarkdownFromCody.tsx index 8796653aa3e2..fb13b1c3c284 100644 --- a/vscode/webviews/components/MarkdownFromCody.tsx +++ b/vscode/webviews/components/MarkdownFromCody.tsx @@ -82,23 +82,29 @@ const URL_PROCESSORS: Partial> = { } /** - * Transforms the children string by wrapping it in backticks if it starts with '```markdown'. + * Transforms the children string by wrapping it in one extra backtick if we find '```markdown'. * This is used to preserve the formatting of Markdown code blocks within the Markdown content. + * Such cases happen when you ask Cody to create a Markdown file or when you load a history chat + * that contains replies for creating Markdown files. * * @param children - The string to transform. * @returns The transformed string. */ const childrenTransform = (children: string): string => { - if (children.startsWith('```markdown')) { - return '`' + children + '`' + if (children.indexOf('```markdown') === -1) { + return children } - return children + children = children.replace('```markdown', '````markdown') + const lastIdx = children.lastIndexOf('```') + + // Replace the last three backticks with four backticks + return children.slice(0, lastIdx) + '````' + children.slice(lastIdx + 3) } -export const MarkdownFromCody: FunctionComponent<{ className?: string; children: string }> = ({ - className, - children, -}) => { +export const MarkdownFromCody: FunctionComponent<{ + className?: string + children: string +}> = ({ className, children }) => { const clientType = useConfig().clientCapabilities.agentIDE const urlTransform = useMemo(() => URL_PROCESSORS[clientType] ?? defaultUrlProcessor, [clientType]) const chatReplyTransformed = childrenTransform(children) From 4dabc79cac425eb1f16a8f1eaeb42bf695fa7bfe Mon Sep 17 00:00:00 2001 From: David Ichim Date: Tue, 18 Feb 2025 00:01:10 +0200 Subject: [PATCH 09/13] fix(response-transformer): restore markdown code block handling and improve text stripping logic - Remove markdown tags from non markdown files - Avoid transformation until message is no longer in progress to avoid expensive mutations --- .../src/edit/output/response-transformer.ts | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/vscode/src/edit/output/response-transformer.ts b/vscode/src/edit/output/response-transformer.ts index 6b61d52d625d..a876907f782e 100644 --- a/vscode/src/edit/output/response-transformer.ts +++ b/vscode/src/edit/output/response-transformer.ts @@ -21,14 +21,14 @@ const PROMPT_TOPIC_REGEX = new RegExp( * Regular expressions to identify markdown code blocks, and then strip the start and end delimiters. * Important for compatibility with different chat models, due to most chat models being trained to output Markdown. */ -// const MARKDOWN_CODE_BLOCK_DELIMITER_START = '```(?:\\w+)?' -// const MARKDOWN_CODE_BLOCK_DELIMITER_END = '```' -// const MARKDOWN_CODE_BLOCK_START = new RegExp(`^${MARKDOWN_CODE_BLOCK_DELIMITER_START}`) -// const MARKDOWN_CODE_BLOCK_END = new RegExp(`${MARKDOWN_CODE_BLOCK_DELIMITER_END}$`) -// const MARKDOWN_CODE_BLOCK_REGEX = new RegExp( -// `${MARKDOWN_CODE_BLOCK_DELIMITER_START}\\s*([\\s\\S]*?)\\s*${MARKDOWN_CODE_BLOCK_DELIMITER_END}`, -// 'g' -// ) +const MARKDOWN_CODE_BLOCK_DELIMITER_START = '```(?:\\w+)?' +const MARKDOWN_CODE_BLOCK_DELIMITER_END = '```' +const MARKDOWN_CODE_BLOCK_START = new RegExp(`^${MARKDOWN_CODE_BLOCK_DELIMITER_START}`) +const MARKDOWN_CODE_BLOCK_END = new RegExp(`${MARKDOWN_CODE_BLOCK_DELIMITER_END}$`) +const MARKDOWN_CODE_BLOCK_REGEX = new RegExp( + `${MARKDOWN_CODE_BLOCK_DELIMITER_START}\\s*([\\s\\S]*?)\\s*${MARKDOWN_CODE_BLOCK_DELIMITER_END}`, + 'g' +) const LEADING_SPACES_AND_NEW_LINES = /^\s*\n/ const LEADING_SPACES = /^[ ]+/ @@ -43,27 +43,27 @@ export function responseTransformer( task: FixupTask, isMessageInProgress: boolean ): string { - const strippedText = text - // Strip specific XML tags referenced in the prompt, e.g. - .replaceAll(PROMPT_TOPIC_REGEX, '') - // Strip Markdown syntax for code blocks, e.g. ```typescript. - // .replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => - // block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') - // ) - - // Trim leading spaces - // - For `add` insertions, the LLM will attempt to continue the code from the position of the cursor, we handle the `insertionPoint` - // but we should preserve new lines as they may be valuable for spacing - // - For other edits, we already trim the selection to exclude padded whitespace, we only want the start of the incoming text - const trimmedText = - task.intent === 'add' - ? strippedText.replace(LEADING_SPACES, '') - : strippedText.replace(LEADING_SPACES_AND_NEW_LINES, '') + if (!isMessageInProgress) { + let strippedText = text + // Strip specific XML tags referenced in the prompt, e.g. + .replaceAll(PROMPT_TOPIC_REGEX, '') + if (task.document.languageId !== 'markdown') { + strippedText = strippedText.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => + block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') + ) + } - // Strip the response of any remaining HTML entities such as < and > - const decodedText = decode(trimmedText) + // Trim leading spaces + // - For `add` insertions, the LLM will attempt to continue the code from the position of the cursor, we handle the `insertionPoint` + // but we should preserve new lines as they may be valuable for spacing + // - For other edits, we already trim the selection to exclude padded whitespace, we only want the start of the incoming text + const trimmedText = + task.intent === 'add' + ? strippedText.replace(LEADING_SPACES, '') + : strippedText.replace(LEADING_SPACES_AND_NEW_LINES, '') - if (!isMessageInProgress) { + // Strip the response of any remaining HTML entities such as < and > + const decodedText = decode(trimmedText) if (task.mode === 'insert') { // For insertions, we want to always ensure we include a new line at the end of the response // We do not attempt to match indentation, as we don't have any original text to compare to @@ -73,7 +73,7 @@ export function responseTransformer( return formatToMatchOriginal(decodedText, task.original, task.fixupFile.uri) } - return decodedText + return text } function formatToMatchOriginal(incoming: string, original: string, uri: vscode.Uri): string { From 5724868b5360f0f26efe1e8dd2efcaf537870278 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Tue, 18 Feb 2025 08:36:21 +0200 Subject: [PATCH 10/13] Revert premature optimization of responseTransformer since tests assume that we perform the sanitation --- .../src/edit/output/response-transformer.ts | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/vscode/src/edit/output/response-transformer.ts b/vscode/src/edit/output/response-transformer.ts index a876907f782e..32f981375d0c 100644 --- a/vscode/src/edit/output/response-transformer.ts +++ b/vscode/src/edit/output/response-transformer.ts @@ -43,27 +43,30 @@ export function responseTransformer( task: FixupTask, isMessageInProgress: boolean ): string { - if (!isMessageInProgress) { - let strippedText = text - // Strip specific XML tags referenced in the prompt, e.g. - .replaceAll(PROMPT_TOPIC_REGEX, '') - if (task.document.languageId !== 'markdown') { - strippedText = strippedText.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => - block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') - ) - } + let strippedText = text + // Strip specific XML tags referenced in the prompt, e.g. + .replaceAll(PROMPT_TOPIC_REGEX, '') + + // Strip Markdown syntax for code blocks, e.g. ```typescript, leaving them for markdown files + if (task.document.languageId !== 'markdown') { + strippedText = strippedText.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => + block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') + ) + } + + // Trim leading spaces + // - For `add` insertions, the LLM will attempt to continue the code from the position of the cursor, we handle the `insertionPoint` + // but we should preserve new lines as they may be valuable for spacing + // - For other edits, we already trim the selection to exclude padded whitespace, we only want the start of the incoming text + const trimmedText = + task.intent === 'add' + ? strippedText.replace(LEADING_SPACES, '') + : strippedText.replace(LEADING_SPACES_AND_NEW_LINES, '') - // Trim leading spaces - // - For `add` insertions, the LLM will attempt to continue the code from the position of the cursor, we handle the `insertionPoint` - // but we should preserve new lines as they may be valuable for spacing - // - For other edits, we already trim the selection to exclude padded whitespace, we only want the start of the incoming text - const trimmedText = - task.intent === 'add' - ? strippedText.replace(LEADING_SPACES, '') - : strippedText.replace(LEADING_SPACES_AND_NEW_LINES, '') + // Strip the response of any remaining HTML entities such as < and > + const decodedText = decode(trimmedText) - // Strip the response of any remaining HTML entities such as < and > - const decodedText = decode(trimmedText) + if (!isMessageInProgress) { if (task.mode === 'insert') { // For insertions, we want to always ensure we include a new line at the end of the response // We do not attempt to match indentation, as we don't have any original text to compare to @@ -73,7 +76,7 @@ export function responseTransformer( return formatToMatchOriginal(decodedText, task.original, task.fixupFile.uri) } - return text + return decodedText } function formatToMatchOriginal(incoming: string, original: string, uri: vscode.Uri): string { From e77c63bf5e84123a7694fc1397fe723877b49007 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Tue, 18 Feb 2025 09:13:47 +0200 Subject: [PATCH 11/13] fix(response-transformer): add null checks for task and restore markdown test cases --- .../src/edit/output/response-transformer.ts | 2 +- vscode/src/edit/output/test-fixtures.ts | 40 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/vscode/src/edit/output/response-transformer.ts b/vscode/src/edit/output/response-transformer.ts index 32f981375d0c..f7530377c3cf 100644 --- a/vscode/src/edit/output/response-transformer.ts +++ b/vscode/src/edit/output/response-transformer.ts @@ -48,7 +48,7 @@ export function responseTransformer( .replaceAll(PROMPT_TOPIC_REGEX, '') // Strip Markdown syntax for code blocks, e.g. ```typescript, leaving them for markdown files - if (task.document.languageId !== 'markdown') { + if (task?.document?.languageId !== 'markdown') { strippedText = strippedText.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') ) diff --git a/vscode/src/edit/output/test-fixtures.ts b/vscode/src/edit/output/test-fixtures.ts index 4165f3dfeafc..6c61ab109ffe 100644 --- a/vscode/src/edit/output/test-fixtures.ts +++ b/vscode/src/edit/output/test-fixtures.ts @@ -29,26 +29,26 @@ export const RESPONSE_TEST_FIXTURES: Record = { expected: CLEAN_RESPONSE, task: DEFAULT_TASK, }, - // withMarkdownSyntax: { - // response: '```\n' + CLEAN_RESPONSE + '```', - // expected: CLEAN_RESPONSE, - // task: DEFAULT_TASK, - // }, - // withMarkdownSyntaxAndLang: { - // response: '```typescript\n' + CLEAN_RESPONSE + '```', - // expected: CLEAN_RESPONSE, - // task: DEFAULT_TASK, - // }, - // withMarkdownSyntaxAndTags: { - // response: - // '```\n' + - // `<${PROMPT_TOPICS.OUTPUT}>` + - // CLEAN_RESPONSE + - // `` + - // '```', - // expected: CLEAN_RESPONSE, - // task: DEFAULT_TASK, - // }, + withMarkdownSyntax: { + response: '```\n' + CLEAN_RESPONSE + '```', + expected: CLEAN_RESPONSE, + task: DEFAULT_TASK, + }, + withMarkdownSyntaxAndLang: { + response: '```typescript\n' + CLEAN_RESPONSE + '```', + expected: CLEAN_RESPONSE, + task: DEFAULT_TASK, + }, + withMarkdownSyntaxAndTags: { + response: + '```\n' + + `<${PROMPT_TOPICS.OUTPUT}>` + + CLEAN_RESPONSE + + `` + + '```', + expected: CLEAN_RESPONSE, + task: DEFAULT_TASK, + }, withHtmlEntities: { response: CLEAN_RESPONSE.replace(//g, '>'), expected: CLEAN_RESPONSE, From 54963f52e83f3d9f6d477b2144725f2582969911 Mon Sep 17 00:00:00 2001 From: Tom Ross Date: Wed, 19 Feb 2025 10:02:46 +0000 Subject: [PATCH 12/13] Apply code suggestions --- .../src/edit/output/response-transformer.ts | 34 +++++++++++----- vscode/src/edit/output/test-fixtures.ts | 22 ++++++++++- .../components/MarkdownFromCody.test.tsx | 39 +++++++++++++++++++ .../webviews/components/MarkdownFromCody.tsx | 9 +---- 4 files changed, 85 insertions(+), 19 deletions(-) diff --git a/vscode/src/edit/output/response-transformer.ts b/vscode/src/edit/output/response-transformer.ts index f7530377c3cf..c4d6a0ad8d00 100644 --- a/vscode/src/edit/output/response-transformer.ts +++ b/vscode/src/edit/output/response-transformer.ts @@ -33,6 +33,29 @@ const MARKDOWN_CODE_BLOCK_REGEX = new RegExp( const LEADING_SPACES_AND_NEW_LINES = /^\s*\n/ const LEADING_SPACES = /^[ ]+/ +/** + * Strips the text of any unnecessary content. + * This includes: + * 1. Prompt topics, e.g. . These are used by the LLM to wrap the output code. + * 2. Markdown code blocks, e.g. ```typescript. Most LLMs are trained to produce Markdown-suitable responses. + */ +function stripText(text: string, task: FixupTask): string { + const strippedText = text + // Strip specific XML tags referenced in the prompt, e.g. + .replaceAll(PROMPT_TOPIC_REGEX, '') + + if (task.document.languageId === 'markdown') { + // Return this text as is, we do not want to strip Markdown blocks as they may be valuable + // in Markdown files + return strippedText + } + + // Strip Markdown syntax for code blocks, e.g. ```typescript. + return strippedText.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => + block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') + ) +} + /** * Given the LLM response for a FixupTask, transforms the response * to make it suitable to insert as code. @@ -43,16 +66,7 @@ export function responseTransformer( task: FixupTask, isMessageInProgress: boolean ): string { - let strippedText = text - // Strip specific XML tags referenced in the prompt, e.g. - .replaceAll(PROMPT_TOPIC_REGEX, '') - - // Strip Markdown syntax for code blocks, e.g. ```typescript, leaving them for markdown files - if (task?.document?.languageId !== 'markdown') { - strippedText = strippedText.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block => - block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '') - ) - } + const strippedText = stripText(text, task) // Trim leading spaces // - For `add` insertions, the LLM will attempt to continue the code from the position of the cursor, we handle the `insertionPoint` diff --git a/vscode/src/edit/output/test-fixtures.ts b/vscode/src/edit/output/test-fixtures.ts index 6c61ab109ffe..8b55d190e5ea 100644 --- a/vscode/src/edit/output/test-fixtures.ts +++ b/vscode/src/edit/output/test-fixtures.ts @@ -11,7 +11,21 @@ const CLEAN_RESPONSE = `export function logDebug(filterLabel: log('error', filterLabel, text, ...args) }` -const DEFAULT_TASK = {} as FixupTask +const CLEAN_MARKDOWN_RESPONSE = `## Example heading + +Some text + +\`\`\`typescript +${CLEAN_RESPONSE} +\`\`\` + +Some more text + +\`\`\typescript +${CLEAN_RESPONSE} +\`\`\`` + +const DEFAULT_TASK = { document: { languageId: 'typescript' } } as FixupTask export const RESPONSE_TEST_FIXTURES: Record = { clean: { @@ -65,4 +79,10 @@ export const RESPONSE_TEST_FIXTURES: Record = { expected: '\n\n' + CLEAN_RESPONSE, task: { ...DEFAULT_TASK, intent: 'add' } as FixupTask, }, + inMarkdownFile: { + response: CLEAN_MARKDOWN_RESPONSE, + // Markdown files should not strip Markdown code blocks + expected: CLEAN_MARKDOWN_RESPONSE, + task: { ...DEFAULT_TASK, document: { languageId: 'markdown' } } as FixupTask, + }, } diff --git a/vscode/webviews/components/MarkdownFromCody.test.tsx b/vscode/webviews/components/MarkdownFromCody.test.tsx index 9038795a27c0..62b2d9d4466e 100644 --- a/vscode/webviews/components/MarkdownFromCody.test.tsx +++ b/vscode/webviews/components/MarkdownFromCody.test.tsx @@ -44,6 +44,8 @@ const complicatedMarkdown = [ 'Escaped \\* markdown and escaped html code &lt;', ].join('\n') +const complicatedMarkdownCodeBlock = '```markdown' + complicatedMarkdown + '```' + describe('MarkdownFromCody', () => { it('renders code blocks, with syntax highlighting', () => { expect(render(complicatedMarkdown)).toMatchInlineSnapshot(` @@ -72,6 +74,43 @@ describe('MarkdownFromCody', () => { `) // TODO(sqs): Not sure why the 'inline html' B tags aren't passing through. }) + it('renders Markdown code blocks, with child code blocks preserved', () => { + expect(render(complicatedMarkdownCodeBlock)).toMatchInlineSnapshot(` + "

+          ## This is a subheading
+
+          Some text
+          in the same paragraph
+          with a [link](./destination).
+
+          \`\`\`\`ts
+          const someTypeScriptCode = funcCall()
+          \`\`\`
+
+          - bullet list item 1
+          - bullet list item 2
+
+          1. item 1
+             \`\`\`ts
+             const codeInsideTheBulletPoint = "string"
+             \`\`\`
+          1. item 2
+
+          > quoted
+          > text
+
+          | col 1 | col 2 |
+          |-------|-------|
+          | A     | B     |
+
+          ![image alt text](./src.jpg)
+
+          inline html
+
+          Escaped \\* markdown and escaped html code &lt;\`\`\`
+          
" + `) + }) it('sanitizes script tags', () => { expect(render('')).toBe('') }) diff --git a/vscode/webviews/components/MarkdownFromCody.tsx b/vscode/webviews/components/MarkdownFromCody.tsx index 4a8829897717..474de2d8e970 100644 --- a/vscode/webviews/components/MarkdownFromCody.tsx +++ b/vscode/webviews/components/MarkdownFromCody.tsx @@ -92,14 +92,7 @@ const URL_PROCESSORS: Partial> = { * @returns The transformed string. */ const childrenTransform = (children: string): string => { - if (children.indexOf('```markdown') === -1) { - return children - } - children = children.replace('```markdown', '````markdown') - const lastIdx = children.lastIndexOf('```') - - // Replace the last three backticks with four backticks - return children.slice(0, lastIdx) + '````' + children.slice(lastIdx + 3) + return children.replace(/```markdown([\s\S]*?)```/g, '````markdown$1````') } export const MarkdownFromCody: FunctionComponent<{ From 5f9cfa2fa806750663dc7d9a82ba86eab88b0807 Mon Sep 17 00:00:00 2001 From: Tom Ross Date: Wed, 19 Feb 2025 10:38:04 +0000 Subject: [PATCH 13/13] revert --- vscode/webviews/components/MarkdownFromCody.test.tsx | 4 ++-- vscode/webviews/components/MarkdownFromCody.tsx | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/vscode/webviews/components/MarkdownFromCody.test.tsx b/vscode/webviews/components/MarkdownFromCody.test.tsx index 62b2d9d4466e..d5bdb2fb3fdd 100644 --- a/vscode/webviews/components/MarkdownFromCody.test.tsx +++ b/vscode/webviews/components/MarkdownFromCody.test.tsx @@ -83,7 +83,7 @@ describe('MarkdownFromCody', () => { in the same paragraph with a [link](./destination). - \`\`\`\`ts + \`\`\`ts const someTypeScriptCode = funcCall() \`\`\` @@ -107,7 +107,7 @@ describe('MarkdownFromCody', () => { inline html - Escaped \\* markdown and escaped html code &lt;\`\`\` + Escaped \\* markdown and escaped html code &lt;\`\`\`\` " `) }) diff --git a/vscode/webviews/components/MarkdownFromCody.tsx b/vscode/webviews/components/MarkdownFromCody.tsx index 474de2d8e970..4a8829897717 100644 --- a/vscode/webviews/components/MarkdownFromCody.tsx +++ b/vscode/webviews/components/MarkdownFromCody.tsx @@ -92,7 +92,14 @@ const URL_PROCESSORS: Partial> = { * @returns The transformed string. */ const childrenTransform = (children: string): string => { - return children.replace(/```markdown([\s\S]*?)```/g, '````markdown$1````') + if (children.indexOf('```markdown') === -1) { + return children + } + children = children.replace('```markdown', '````markdown') + const lastIdx = children.lastIndexOf('```') + + // Replace the last three backticks with four backticks + return children.slice(0, lastIdx) + '````' + children.slice(lastIdx + 3) } export const MarkdownFromCody: FunctionComponent<{