microsoft
diff --git a/‎.github/workflows/evalprompt.yml
+72 b/‎.github/workflows/evalprompt.yml
+72
diff --git a/‎.github/workflows/scripts/collectPromptRes.js
+42 b/‎.github/workflows/scripts/collectPromptRes.js
+42
diff --git a/‎.vscode/extensions.json
+5-1 b/‎.vscode/extensions.json
+5-1
diff --git a/‎.vscode/tasks.json
+49 b/‎.vscode/tasks.json
+49
diff --git a/‎PRERELEASE.md
+4-1 b/‎PRERELEASE.md
+4-1
diff --git a/‎README.md
+4 b/‎README.md
+4
diff --git a/‎evalprompt/README.md
+89 b/‎evalprompt/README.md
+89
diff --git a/‎evalprompt/example/README.md
+14 b/‎evalprompt/example/README.md
+14
diff --git a/‎evalprompt/example/customProvider.ts
+45 b/‎evalprompt/example/customProvider.ts
+45
diff --git a/‎evalprompt/example/promptfooconfig.yaml
+22 b/‎evalprompt/example/promptfooconfig.yaml
+22
diff --git a/‎evalprompt/example/prompts.txt
+1 b/‎evalprompt/example/prompts.txt
+1
diff --git a/‎evalprompt/example/vars.csv
+3 b/‎evalprompt/example/vars.csv
+3
@@ -0,0 +1,72 @@
+name: evaluate prompt
+on:
+  pull_request:
+    paths:
+      - "evalprompt/**"
+      - "src/prompts/**"
+    branches:
+      - main
+      - dev
+
+jobs:
+  prompt-evaluation:
+    runs-on: ubuntu-latest
+    environment: engineering
+    permissions:
+      contents: read # Required for actions/checkout
+      pull-requests: write # Ability to post comments on Pull Requests
+      id-token: write # Required for Azure login
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-node@v3
+        with:
+          node-version: lts/*
+      - name: Setup project
+        run: |
+          npm install
+      - name: Azure login
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.EVALUATION_TEST_CLIENT_ID }}
+          tenant-id: ${{ secrets.EVALUATION_TEST_TENANT_ID }}
+          subscription-id: ${{ secrets.EVALUATION_TEST_SUB_ID }}
+          enable-AzPSSession: true
+      - name: Set up promptfoo cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/promptfoo
+          key: ${{ runner.os }}-promptfoo-v1
+          restore-keys: |
+            ${{ runner.os }}-promptfoo-
+
+      - name: Prompt Evaluation
+        continue-on-error: true
+        env:
+          PROMPTFOO_CACHE_PATH: ~/.cache/promptfoo
+        run: |
+          for dir in evalprompt/*/; do
+            if [ -d "$dir" ] && [ "$(basename "$dir")" != "example" ]; then
+              cd "$dir" || continue
+              npx promptfoo eval -o output.json --share || true
+              cd - > /dev/null || exit
+            fi
+          done
+
+      - name: generate comments doc
+        run: |
+          node .github/workflows/scripts/collectPromptRes.js
+          cat ./commentContent.txt
+
+      - uses: actions/github-script@v7
+        with:
+          github-token: ${{secrets.GITHUB_TOKEN}}
+          script: |
+            const fs = require('fs');
+            const content = fs.readFileSync('./commentContent.txt', 'utf8');
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: content
+            })
+
@@ -0,0 +1,42 @@
+const path = require('path');
+const fs = require('fs');
+const rootPath = path.join(__dirname, "..", "..", "..");
+const evalPromptDir = path.join(rootPath, "evalprompt");
+// collect results
+let commentContent = '';
+const directories = fs.readdirSync(evalPromptDir, { withFileTypes: true })
+    .filter(dirent => dirent.isDirectory())
+    .map(dirent => dirent.name);
+console.log(directories);
+for (const dir of directories) {
+    const outputFile = path.join(evalPromptDir, dir, 'output.json');
+    const baselineFile = path.join(evalPromptDir, dir, 'baseline.json');
+    if (outputFile == null || !fs.existsSync(outputFile))
+        continue;
+    console.log(outputFile);
+    const output = JSON.parse(fs.readFileSync(outputFile, 'utf8'));
+    let body = ` LLM prompt result for ${dir}
+
+| From | Success | Failure |  Score  |
+|---------|---------|---------|---------|
+| Output | ${output.results.stats.successes} | ${output.results.stats.failures} | ${output.results.prompts[0].metrics.score} |
+`;
+    if (baselineFile && fs.existsSync(baselineFile)) {
+        const baseline = JSON.parse(fs.readFileSync(baselineFile, 'utf8'));
+        body += `|Baseline| ${baseline.success} | ${baseline.failure} | ${baseline.score} |
+        `
+        if (baseline.successes < output.results.stats.successes || baseline.failures > output.results.stats.failures || baseline.score > output.results.prompts[0].metrics.score) {
+            body = `[Not PASS]🚨 ` + body;
+        } else {
+            body = `[PASS]✅ ` + body;
+        }
+    }
+    if (output.shareableUrl) {
+        body = body.concat(`\n**» [View eval results](${output.shareableUrl}) «**\n`);
+    } else {
+        body = body.concat('\n**» View eval results in CI console «**\n');
+    }
+    commentContent += body + "\n\n";
+}
+
+fs.writeFileSync(path.join(rootPath, 'commentContent.txt'), commentContent, 'utf-8');
@@ -1,5 +1,9 @@
 {
   // See http://go.microsoft.com/fwlink/?LinkId=827846
   // for the documentation about the extensions.json format
-  "recommendations": ["dbaeumer.vscode-eslint", "amodio.tsl-problem-matcher"]
+  "recommendations": [
+    "dbaeumer.vscode-eslint",
+    "amodio.tsl-problem-matcher",
+    "rioj7.command-variable"
+  ]
 }
@@ -35,6 +35,55 @@
 				"npm: watch-tests"
 			],
 			"problemMatcher": []
+		},
+		{
+			"label": "[hidden]promptfoo eval",
+			"type": "shell",
+			"command": "npx",
+			"hide": true,
+			"args": [
+				"promptfoo",
+				"eval"
+			],
+			"options": {
+				"cwd": "${input:selectDir}"
+			},
+			"problemMatcher": []
+		},
+		{
+			"label": "evaluate prompt",
+			"type": "shell",
+			"command": "npx",
+			"args": [
+				"promptfoo",
+				"view",
+				"-y"
+			],
+			"options": {
+				"cwd": "${input:selectDir}"
+			},
+			"dependsOn": [
+				"[hidden]promptfoo eval"
+			],
+			"problemMatcher": []
+		}
+	],
+	"inputs": [
+		{
+			"id": "selectDir",
+			"type": "command",
+			"command": "extension.commandvariable.file.pickFile",
+			"args": {
+				"description": "Select Project Folder to Evaluate Prompt",
+				"include": "evalprompt/*/promptfooconfig.yaml",
+				"showDirs": false,
+				"transform": {
+					"text": "${fileDirname}",
+					"find": "\\\\",
+					"replace": "/",
+					"flags": "g"
+				}
+			}
 		}
 	]
 }
@@ -3,7 +3,10 @@
 
 > Note: This changelog only includes the changes for the pre-release versions of Azure API Center for Visual Studio Code. For the changelog of stable versions, please refer to the [Changelog of Azure API Center for Visual Studio Code](https://github.com/microsoft/vscode-azureapicenter/blob/main/CHANGELOG.md).
 
-### January 15, 2024
+### February 27, 2025
+* Integrate with the `Teams Toolkit` extension to empower developers to create M365 Copilot Declarative Agents using API definitions from API Center.
+
+### January 15, 2025
 * Add support for API Analysis Profiles
 * Add 'None' option to unselect active style guide
 
 
@@ -63,6 +63,10 @@ Export API specification document for a specific API either from the tree view o
 
 ![Export API specification](./media/export-api.gif)
 
+Create M365 Copilot Declarative Agent
+
+![Create M365 Copilot Declarative Agent](./media/create-declarative-agent.png)
+
 ## Pre-Release
 
 For the features with `[Pre-Release]` flag, you need to install pre-release version of the extension.
 
@@ -0,0 +1,89 @@
+# Evaluation Prompt Guide
+
+## Local Debug
+
+### In VSCode
+1. install [Command Variable](https://marketplace.visualstudio.com/items?itemName=rioj7.command-variable)
+1. `F1` -> `Tasks: Run Task` -> `evaluate prompt`
+1. select the target folder
+
+### Terminal
+1. Switch to target folder
+1. run `npx promptfoo eval`
+1. run `npx promptfoo view -y`
+
+## GitHub CI Pipeline
+When submitting a PR on GitHub, if there are code changes in the evalprompt folder, this pipeline will be triggered.
+The GitHub bot will reply under the PR with the results of the evaluation prompt. You can click on the hyperlink in the results to view more detailed content.
+
+## Case Guide
+### Prompts
+- prompt from raw text
+```yaml
+prompts:
+  - 'Translate the following text to French: "{{name}}: {{text}}"'
+  - 'Translate the following text to German: "{{name}}: {{text}}"'
+```
+
+- prompts as file
+```yaml
+prompts:
+  - file://path/to/prompts.json
+  - file://path/to/prompts.txt
+  - file://path/to/prompts.js
+  - file://path/to/prompts.py
+```
+
+### Provider
+- built-in provider
+```yaml
+providers:
+  - id: azure:chat:gpt-4o
+    config:
+      apiHost: "apicevaluationtestai.openai.azure.com"
+```
+
+### custom provider
+```yaml
+providers:
+  - id: file://customProvider.ts
+     label: "My custom provider"
+```
+
+### Asserts (or Evaluation)
+- Default  Evaluation for all tests
+```yaml
+defaultTest:
+  assert:
+    - type: javascript
+      value: file://spectral.mjs
+```
+
+### Evaluation for each test
+```yaml
+tests:
+  - description: 'Test that the output is cheap and fast'
+    vars:
+      example: 'Hello, World!'
+    assert:
+      - type: assert-set
+        assert:
+          - type: cost
+            threshold: 0.001
+          - type: latency
+            threshold: 200
+```
+
+### Tests (User inputs)
+- test from raw file
+```yaml
+tests:
+  - file://relative/path/to/normal_test.yaml
+```
+
+- tests from script (javascript, python, go or others)
+```yaml
+tests:
+  - file://path/to/tests.js
+```
+
@@ -0,0 +1,14 @@
+This example uses a custom API provider in `customProvider.ts`. It also uses CSV test cases.
+
+Create an `.env` file and set `AZURE_OPENAI_ENDPOINT`
+Run:
+
+```
+promptfoo eval
+```
+
+Full command-line equivalent:
+
+```
+promptfoo eval --prompts prompts.txt --tests vars.csv --providers openai:chat --output output.json --providers customProvider.js
+```
@@ -0,0 +1,45 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
+import "@azure/openai/types";
+import "dotenv/config";
+import { AzureOpenAI } from "openai";
+import type { ApiProvider, ProviderOptions, ProviderResponse } from 'promptfoo';
+
+export default class CustomApiProvider implements ApiProvider {
+  protected providerId: string;
+  public config: any;
+
+  constructor(options: ProviderOptions) {
+    // The caller may override Provider ID (e.g. when using multiple instances of the same provider)
+    this.providerId = options.id || 'custom provider';
+
+    // The config object contains any options passed to the provider in the config file.
+    this.config = options.config;
+  }
+
+  id(): string {
+    return this.providerId;
+  }
+
+  async callApi(prompt: string): Promise<ProviderResponse> {
+    const scope = "https://cognitiveservices.azure.com/.default";
+    const azureADTokenProvider = getBearerTokenProvider(new DefaultAzureCredential(), scope);
+    const deployment = "gpt-4o";
+    const apiVersion = "2024-05-01-preview";
+    const client = new AzureOpenAI({ azureADTokenProvider, deployment, apiVersion });
+
+    const result = await client.chat.completions.create({
+      messages: [
+        { role: "user", content: prompt }
+      ],
+      model: "",
+      max_tokens: 128
+    });
+
+    const ret: ProviderResponse = {
+      output: result.choices[0].message.content
+    };
+    return ret;
+  }
+}
@@ -0,0 +1,22 @@
+# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
+prompts:
+  - file://prompts.txt
+
+providers:
+  - id: file://customProvider.ts
+    label: "My custom provider"
+
+tests:
+  - vars:
+      language: French
+      input: Hello world
+    assert:
+      - type: javascript
+        value: output.toLowerCase().includes('bonjour')
+  - vars:
+      language: German
+      input: How's it going?
+    assert:
+      - type: similar
+        value: was geht
+        threshold: 0.6 # cosine similarity
@@ -0,0 +1 @@
+Rephrase this in {{language}}: {{input}}
@@ -0,0 +1,3 @@
+body
+Hello world
+I'm hungry
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,9 @@`
`1`	`1`	`{`
`2`	`2`	`// See http://go.microsoft.com/fwlink/?LinkId=827846`
`3`	`3`	`// for the documentation about the extensions.json format`
`4`		`- "recommendations": ["dbaeumer.vscode-eslint", "amodio.tsl-problem-matcher"]`
	`4`	`+ "recommendations": [`
	`5`	`+ "dbaeumer.vscode-eslint",`
	`6`	`+ "amodio.tsl-problem-matcher",`
	`7`	`+ "rioj7.command-variable"`
	`8`	`+ ]`
`5`	`9`	`}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Rephrase this in {{language}}: {{input}}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+body`
	`2`	`+Hello world`
	`3`	`+I'm hungry`