sourcegraph · umpox · Feb 19, 2025 · Feb 15, 2025 · Feb 16, 2025 · Feb 16, 2025
diff --git a/lib/shared/src/chat/preamble.ts b/lib/shared/src/chat/preamble.ts
@@ -18,6 +18,10 @@ For executable terminal commands: enclose each command in individual "bash" lang
 
 const CHAT_PREAMBLE = DEFAULT_PREAMBLE.concat(SMART_APPLY_PREAMBLE)
 
+export function getChatPreamble(): PromptString {
+    return CHAT_PREAMBLE
+}
+
 export function getSimplePreamble(
     model: ChatModel | EditModel | undefined,
     apiVersion: number,

diff --git a/lib/shared/src/index.ts b/lib/shared/src/index.ts
@@ -54,7 +54,7 @@ export {
 } from './models/utils'
 export { BotResponseMultiplexer } from './chat/bot-response-multiplexer'
 export { ChatClient } from './chat/chat'
-export { getDefaultSystemPrompt, getSimplePreamble } from './chat/preamble'
+export { getDefaultSystemPrompt, getChatPreamble, getSimplePreamble } from './chat/preamble'
 export type {
     SerializedChatInteraction,
     SerializedChatTranscript,

diff --git a/vscode/src/chat/chat-view/prompt.test.ts b/vscode/src/chat/chat-view/prompt.test.ts
@@ -8,6 +8,7 @@ import {
     type ModelsData,
     contextFiltersProvider,
     createModel,
+    getChatPreamble,
     graphqlClient,
     mockAuthStatus,
     mockClientCapabilities,
@@ -79,25 +80,22 @@ describe('DefaultPrompter', () => {
         chat.addHumanMessage({ text: ps`Hello` })
 
         const { prompt, context } = await new DefaultPrompter([], []).makePrompt(chat, 0)
-        expect(prompt).toMatchInlineSnapshot(`
-          [
+        expect(prompt).toEqual([
             {
-              "speaker": "human",
-              "text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`$LANGUAGE:$FILEPATH\`\`\`
-          For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.",
+                speaker: 'human',
+                text: getChatPreamble(),
             },
             {
-              "speaker": "assistant",
-              "text": "I am Cody, an AI coding assistant from Sourcegraph.",
+                speaker: 'assistant',
+                text: ps`I am Cody, an AI coding assistant from Sourcegraph.`,
             },
             {
-              "contextAlternatives": undefined,
-              "contextFiles": undefined,
-              "speaker": "human",
-              "text": "Hello",
+                contextAlternatives: undefined,
+                contextFiles: undefined,
+                speaker: 'human',
+                text: ps`Hello`,
             },
-          ]
-        `)
+        ])
         expect(context.used).toEqual([])
         expect(context.ignored).toEqual([])
     })
@@ -160,27 +158,22 @@ describe('DefaultPrompter', () => {
         chat.addHumanMessage({ text: ps`Hello` })
 
         const { prompt, context } = await new DefaultPrompter([], []).makePrompt(chat, 0)
-        expect(prompt).toMatchInlineSnapshot(`
-          [
+        expect(prompt).toEqual([
             {
-              "speaker": "human",
-              "text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`$LANGUAGE:$FILEPATH\`\`\`
-          For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.
-
-          Always respond with 🧀 emojis",
+                speaker: 'human',
+                text: ps`${getChatPreamble()}\n\nAlways respond with 🧀 emojis`,
             },
             {
-              "speaker": "assistant",
-              "text": "I am Cody, an AI coding assistant from Sourcegraph.",
+                speaker: 'assistant',
+                text: ps`I am Cody, an AI coding assistant from Sourcegraph.`,
             },
             {
-              "contextAlternatives": undefined,
-              "contextFiles": undefined,
-              "speaker": "human",
-              "text": "Hello",
+                contextAlternatives: undefined,
+                contextFiles: undefined,
+                speaker: 'human',
+                text: ps`Hello`,
             },
-          ]
-        `)
+        ])
         expect(context.used).toEqual([])
         expect(context.ignored).toEqual([])
     })

diff --git a/vscode/src/edit/output/response-transformer.ts b/vscode/src/edit/output/response-transformer.ts
@@ -33,6 +33,29 @@ const MARKDOWN_CODE_BLOCK_REGEX = new RegExp(
 const LEADING_SPACES_AND_NEW_LINES = /^\s*\n/
 const LEADING_SPACES = /^[ ]+/
 
+/**
+ * Strips the text of any unnecessary content.
+ * This includes:
+ * 1. Prompt topics, e.g. <CODE511>. These are used by the LLM to wrap the output code.
+ * 2. Markdown code blocks, e.g. ```typescript. Most LLMs are trained to produce Markdown-suitable responses.
+ */
+function stripText(text: string, task: FixupTask): string {
+    const strippedText = text
+        // Strip specific XML tags referenced in the prompt, e.g. <CODE511>
+        .replaceAll(PROMPT_TOPIC_REGEX, '')
+
+    if (task.document.languageId === 'markdown') {
+        // Return this text as is, we do not want to strip Markdown blocks as they may be valuable
+        // in Markdown files
+        return strippedText
+    }
+
+    // Strip Markdown syntax for code blocks, e.g. ```typescript.
+    return strippedText.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block =>
+        block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '')
+    )
+}
+
 /**
  * Given the LLM response for a FixupTask, transforms the response
  * to make it suitable to insert as code.
@@ -43,13 +66,7 @@ export function responseTransformer(
     task: FixupTask,
     isMessageInProgress: boolean
 ): string {
-    const strippedText = text
-        // Strip specific XML tags referenced in the prompt, e.g. <CODE511>
-        .replaceAll(PROMPT_TOPIC_REGEX, '')
-        // Strip Markdown syntax for code blocks, e.g. ```typescript.
-        .replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block =>
-            block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '')
-        )
+    const strippedText = stripText(text, task)
 
     // Trim leading spaces
     // - For `add` insertions, the LLM will attempt to continue the code from the position of the cursor, we handle the `insertionPoint`

diff --git a/vscode/src/edit/output/test-fixtures.ts b/vscode/src/edit/output/test-fixtures.ts
@@ -11,7 +11,21 @@ const CLEAN_RESPONSE = `export function logDebug<T extends string>(filterLabel:
     log<T>('error', filterLabel, text, ...args)
 }`
 
-const DEFAULT_TASK = {} as FixupTask
+const CLEAN_MARKDOWN_RESPONSE = `## Example heading
+
+Some text
+
+\`\`\`typescript
+${CLEAN_RESPONSE}
+\`\`\`
+
+Some more text
+
+\`\`\typescript
+${CLEAN_RESPONSE}
+\`\`\``
+
+const DEFAULT_TASK = { document: { languageId: 'typescript' } } as FixupTask
 
 export const RESPONSE_TEST_FIXTURES: Record<string, ResponseTestFixture> = {
     clean: {
@@ -65,4 +79,10 @@ export const RESPONSE_TEST_FIXTURES: Record<string, ResponseTestFixture> = {
         expected: '\n\n' + CLEAN_RESPONSE,
         task: { ...DEFAULT_TASK, intent: 'add' } as FixupTask,
     },
+    inMarkdownFile: {
+        response: CLEAN_MARKDOWN_RESPONSE,
+        // Markdown files should not strip Markdown code blocks
+        expected: CLEAN_MARKDOWN_RESPONSE,
+        task: { ...DEFAULT_TASK, document: { languageId: 'markdown' } } as FixupTask,
+    },
 }
diff --git a/vscode/webviews/components/MarkdownFromCody.test.tsx b/vscode/webviews/components/MarkdownFromCody.test.tsx
@@ -44,6 +44,8 @@ const complicatedMarkdown = [
     'Escaped \\* markdown and escaped html code &amp;lt;',
 ].join('\n')
 
+const complicatedMarkdownCodeBlock = '```markdown' + complicatedMarkdown + '```'
+
 describe('MarkdownFromCody', () => {
     it('renders code blocks, with syntax highlighting', () => {
         expect(render(complicatedMarkdown)).toMatchInlineSnapshot(`
@@ -72,6 +74,43 @@ describe('MarkdownFromCody', () => {
         `)
         // TODO(sqs): Not sure why the '<b>inline html</b>' B tags aren't passing through.
     })
+    it('renders Markdown code blocks, with child code blocks preserved', () => {
+        expect(render(complicatedMarkdownCodeBlock)).toMatchInlineSnapshot(`
+          "<pre><code class="hljs language-markdown#">
+          ## This is a subheading
+
+          Some text
+          in the same paragraph
+          with a [link](./destination).
+
+          \`\`\`ts
+          const someTypeScriptCode = funcCall()
+          \`\`\`
+
+          - bullet list item 1
+          - bullet list item 2
+
+          1. item 1
+             \`\`\`ts
+             const codeInsideTheBulletPoint = "string"
+             \`\`\`
+          1. item 2
+
+          > quoted
+          > text
+
+          | col 1 | col 2 |
+          |-------|-------|
+          | A     | B     |
+
+          ![image alt text](./src.jpg)
+
+          <b>inline html</b>
+
+          Escaped \\* markdown and escaped html code &amp;lt;\`\`\`\`
+          </code></pre>"
+        `)
+    })
     it('sanitizes script tags', () => {
         expect(render('<script>evil();</script>')).toBe('')
     })

diff --git a/vscode/webviews/components/MarkdownFromCody.tsx b/vscode/webviews/components/MarkdownFromCody.tsx
@@ -82,16 +82,37 @@ const URL_PROCESSORS: Partial<Record<CodyIDE, UrlTransform>> = {
     [CodyIDE.VSCode]: wrapLinksWithCodyOpenCommand,
 }
 
-export const MarkdownFromCody: FunctionComponent<{ className?: string; children: string }> = ({
-    className,
-    children,
-}) => {
+/**
+ * Transforms the children string by wrapping it in one extra backtick if we find '```markdown'.
+ * This is used to preserve the formatting of Markdown code blocks within the Markdown content.
+ * Such cases happen when you ask Cody to create a Markdown file or when you load a history chat
+ * that contains replies for creating Markdown files.
+ *
+ * @param children - The string to transform.
+ * @returns The transformed string.
+ */
+const childrenTransform = (children: string): string => {
+    if (children.indexOf('```markdown') === -1) {
+        return children
+    }
+    children = children.replace('```markdown', '````markdown')
+    const lastIdx = children.lastIndexOf('```')
+
+    // Replace the last three backticks with four backticks
+    return children.slice(0, lastIdx) + '````' + children.slice(lastIdx + 3)
+}
+
+export const MarkdownFromCody: FunctionComponent<{
+    className?: string
+    children: string
+}> = ({ className, children }) => {
     const clientType = useConfig().clientCapabilities.agentIDE
     const urlTransform = useMemo(() => URL_PROCESSORS[clientType] ?? defaultUrlProcessor, [clientType])
+    const chatReplyTransformed = childrenTransform(children)
 
     return (
         <Markdown className={className} {...markdownPluginProps()} urlTransform={urlTransform}>
-            {children}
+            {chatReplyTransformed}
         </Markdown>
     )
 }