Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(smart-apply): stripping of markdown code blocks #7105

Merged
merged 14 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lib/shared/src/chat/preamble.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ For executable terminal commands: enclose each command in individual "bash" lang

const CHAT_PREAMBLE = DEFAULT_PREAMBLE.concat(SMART_APPLY_PREAMBLE)

export function getChatPreamble(): PromptString {
return CHAT_PREAMBLE
}

export function getSimplePreamble(
model: ChatModel | EditModel | undefined,
apiVersion: number,
Expand Down
2 changes: 1 addition & 1 deletion lib/shared/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ export {
} from './models/utils'
export { BotResponseMultiplexer } from './chat/bot-response-multiplexer'
export { ChatClient } from './chat/chat'
export { getDefaultSystemPrompt, getSimplePreamble } from './chat/preamble'
export { getDefaultSystemPrompt, getChatPreamble, getSimplePreamble } from './chat/preamble'
export type {
SerializedChatInteraction,
SerializedChatTranscript,
Expand Down
49 changes: 21 additions & 28 deletions vscode/src/chat/chat-view/prompt.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
type ModelsData,
contextFiltersProvider,
createModel,
getChatPreamble,
graphqlClient,
mockAuthStatus,
mockClientCapabilities,
Expand Down Expand Up @@ -79,25 +80,22 @@ describe('DefaultPrompter', () => {
chat.addHumanMessage({ text: ps`Hello` })

const { prompt, context } = await new DefaultPrompter([], []).makePrompt(chat, 0)
expect(prompt).toMatchInlineSnapshot(`
[
expect(prompt).toEqual([
{
"speaker": "human",
"text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`$LANGUAGE:$FILEPATH\`\`\`
For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.",
speaker: 'human',
text: getChatPreamble(),
},
{
"speaker": "assistant",
"text": "I am Cody, an AI coding assistant from Sourcegraph.",
speaker: 'assistant',
text: ps`I am Cody, an AI coding assistant from Sourcegraph.`,
},
{
"contextAlternatives": undefined,
"contextFiles": undefined,
"speaker": "human",
"text": "Hello",
contextAlternatives: undefined,
contextFiles: undefined,
speaker: 'human',
text: ps`Hello`,
},
]
`)
])
expect(context.used).toEqual([])
expect(context.ignored).toEqual([])
})
Expand Down Expand Up @@ -160,27 +158,22 @@ describe('DefaultPrompter', () => {
chat.addHumanMessage({ text: ps`Hello` })

const { prompt, context } = await new DefaultPrompter([], []).makePrompt(chat, 0)
expect(prompt).toMatchInlineSnapshot(`
[
expect(prompt).toEqual([
{
"speaker": "human",
"text": "You are Cody, an AI coding assistant from Sourcegraph.If your answer contains fenced code blocks in Markdown, include the relevant full file path in the code block tag using this structure: \`\`\`$LANGUAGE:$FILEPATH\`\`\`
For executable terminal commands: enclose each command in individual "bash" language code block without comments and new lines inside.

Always respond with 🧀 emojis",
speaker: 'human',
text: ps`${getChatPreamble()}\n\nAlways respond with 🧀 emojis`,
},
{
"speaker": "assistant",
"text": "I am Cody, an AI coding assistant from Sourcegraph.",
speaker: 'assistant',
text: ps`I am Cody, an AI coding assistant from Sourcegraph.`,
},
{
"contextAlternatives": undefined,
"contextFiles": undefined,
"speaker": "human",
"text": "Hello",
contextAlternatives: undefined,
contextFiles: undefined,
speaker: 'human',
text: ps`Hello`,
},
]
`)
])
expect(context.used).toEqual([])
expect(context.ignored).toEqual([])
})
Expand Down
31 changes: 24 additions & 7 deletions vscode/src/edit/output/response-transformer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,29 @@ const MARKDOWN_CODE_BLOCK_REGEX = new RegExp(
const LEADING_SPACES_AND_NEW_LINES = /^\s*\n/
const LEADING_SPACES = /^[ ]+/

/**
* Strips the text of any unnecessary content.
* This includes:
* 1. Prompt topics, e.g. <CODE511>. These are used by the LLM to wrap the output code.
* 2. Markdown code blocks, e.g. ```typescript. Most LLMs are trained to produce Markdown-suitable responses.
*/
function stripText(text: string, task: FixupTask): string {
const strippedText = text
// Strip specific XML tags referenced in the prompt, e.g. <CODE511>
.replaceAll(PROMPT_TOPIC_REGEX, '')

if (task.document.languageId === 'markdown') {
// Return this text as is, we do not want to strip Markdown blocks as they may be valuable
// in Markdown files
return strippedText
}

// Strip Markdown syntax for code blocks, e.g. ```typescript.
return strippedText.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block =>
block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '')
)
}

/**
* Given the LLM response for a FixupTask, transforms the response
* to make it suitable to insert as code.
Expand All @@ -43,13 +66,7 @@ export function responseTransformer(
task: FixupTask,
isMessageInProgress: boolean
): string {
const strippedText = text
// Strip specific XML tags referenced in the prompt, e.g. <CODE511>
.replaceAll(PROMPT_TOPIC_REGEX, '')
// Strip Markdown syntax for code blocks, e.g. ```typescript.
.replaceAll(MARKDOWN_CODE_BLOCK_REGEX, block =>
block.replace(MARKDOWN_CODE_BLOCK_START, '').replace(MARKDOWN_CODE_BLOCK_END, '')
)
const strippedText = stripText(text, task)

// Trim leading spaces
// - For `add` insertions, the LLM will attempt to continue the code from the position of the cursor, we handle the `insertionPoint`
Expand Down
22 changes: 21 additions & 1 deletion vscode/src/edit/output/test-fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,21 @@ const CLEAN_RESPONSE = `export function logDebug<T extends string>(filterLabel:
log<T>('error', filterLabel, text, ...args)
}`

const DEFAULT_TASK = {} as FixupTask
const CLEAN_MARKDOWN_RESPONSE = `## Example heading

Some text

\`\`\`typescript
${CLEAN_RESPONSE}
\`\`\`

Some more text

\`\`\typescript
${CLEAN_RESPONSE}
\`\`\``

const DEFAULT_TASK = { document: { languageId: 'typescript' } } as FixupTask

export const RESPONSE_TEST_FIXTURES: Record<string, ResponseTestFixture> = {
clean: {
Expand Down Expand Up @@ -65,4 +79,10 @@ export const RESPONSE_TEST_FIXTURES: Record<string, ResponseTestFixture> = {
expected: '\n\n' + CLEAN_RESPONSE,
task: { ...DEFAULT_TASK, intent: 'add' } as FixupTask,
},
inMarkdownFile: {
response: CLEAN_MARKDOWN_RESPONSE,
// Markdown files should not strip Markdown code blocks
expected: CLEAN_MARKDOWN_RESPONSE,
task: { ...DEFAULT_TASK, document: { languageId: 'markdown' } } as FixupTask,
},
}
39 changes: 39 additions & 0 deletions vscode/webviews/components/MarkdownFromCody.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ const complicatedMarkdown = [
'Escaped \\* markdown and escaped html code &amp;lt;',
].join('\n')

const complicatedMarkdownCodeBlock = '```markdown' + complicatedMarkdown + '```'

describe('MarkdownFromCody', () => {
it('renders code blocks, with syntax highlighting', () => {
expect(render(complicatedMarkdown)).toMatchInlineSnapshot(`
Expand Down Expand Up @@ -72,6 +74,43 @@ describe('MarkdownFromCody', () => {
`)
// TODO(sqs): Not sure why the '<b>inline html</b>' B tags aren't passing through.
})
it('renders Markdown code blocks, with child code blocks preserved', () => {
expect(render(complicatedMarkdownCodeBlock)).toMatchInlineSnapshot(`
"<pre><code class="hljs language-markdown#">
## This is a subheading

Some text
in the same paragraph
with a [link](./destination).

\`\`\`ts
const someTypeScriptCode = funcCall()
\`\`\`

- bullet list item 1
- bullet list item 2

1. item 1
\`\`\`ts
const codeInsideTheBulletPoint = "string"
\`\`\`
1. item 2

> quoted
> text

| col 1 | col 2 |
|-------|-------|
| A | B |

![image alt text](./src.jpg)

<b>inline html</b>

Escaped \\* markdown and escaped html code &amp;lt;\`\`\`\`
</code></pre>"
`)
})
it('sanitizes script tags', () => {
expect(render('<script>evil();</script>')).toBe('')
})
Expand Down
31 changes: 26 additions & 5 deletions vscode/webviews/components/MarkdownFromCody.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,37 @@ const URL_PROCESSORS: Partial<Record<CodyIDE, UrlTransform>> = {
[CodyIDE.VSCode]: wrapLinksWithCodyOpenCommand,
}

export const MarkdownFromCody: FunctionComponent<{ className?: string; children: string }> = ({
className,
children,
}) => {
/**
* Transforms the children string by wrapping it in one extra backtick if we find '```markdown'.
* This is used to preserve the formatting of Markdown code blocks within the Markdown content.
* Such cases happen when you ask Cody to create a Markdown file or when you load a history chat
* that contains replies for creating Markdown files.
*
* @param children - The string to transform.
* @returns The transformed string.
*/
const childrenTransform = (children: string): string => {
if (children.indexOf('```markdown') === -1) {
return children
}
children = children.replace('```markdown', '````markdown')
const lastIdx = children.lastIndexOf('```')

// Replace the last three backticks with four backticks
return children.slice(0, lastIdx) + '````' + children.slice(lastIdx + 3)
}

export const MarkdownFromCody: FunctionComponent<{
className?: string
children: string
}> = ({ className, children }) => {
const clientType = useConfig().clientCapabilities.agentIDE
const urlTransform = useMemo(() => URL_PROCESSORS[clientType] ?? defaultUrlProcessor, [clientType])
const chatReplyTransformed = childrenTransform(children)

return (
<Markdown className={className} {...markdownPluginProps()} urlTransform={urlTransform}>
{children}
{chatReplyTransformed}
</Markdown>
)
}
Expand Down
Loading