Skip to content

Commit 5dca6d3

Browse files
authored
Merge pull request #328 from microsoft/dev
build: Feb release
2 parents b23608f + 6972216 commit 5dca6d3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+16465
-6252
lines changed

.github/workflows/evalprompt.yml

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
name: evaluate prompt
2+
on:
3+
pull_request:
4+
paths:
5+
- "evalprompt/**"
6+
- "src/prompts/**"
7+
branches:
8+
- main
9+
- dev
10+
11+
jobs:
12+
prompt-evaluation:
13+
runs-on: ubuntu-latest
14+
environment: engineering
15+
permissions:
16+
contents: read # Required for actions/checkout
17+
pull-requests: write # Ability to post comments on Pull Requests
18+
id-token: write # Required for Azure login
19+
steps:
20+
- uses: actions/checkout@v3
21+
- uses: actions/setup-node@v3
22+
with:
23+
node-version: lts/*
24+
- name: Setup project
25+
run: |
26+
npm install
27+
- name: Azure login
28+
uses: azure/login@v2
29+
with:
30+
client-id: ${{ secrets.EVALUATION_TEST_CLIENT_ID }}
31+
tenant-id: ${{ secrets.EVALUATION_TEST_TENANT_ID }}
32+
subscription-id: ${{ secrets.EVALUATION_TEST_SUB_ID }}
33+
enable-AzPSSession: true
34+
- name: Set up promptfoo cache
35+
uses: actions/cache@v3
36+
with:
37+
path: ~/.cache/promptfoo
38+
key: ${{ runner.os }}-promptfoo-v1
39+
restore-keys: |
40+
${{ runner.os }}-promptfoo-
41+
42+
- name: Prompt Evaluation
43+
continue-on-error: true
44+
env:
45+
PROMPTFOO_CACHE_PATH: ~/.cache/promptfoo
46+
run: |
47+
for dir in evalprompt/*/; do
48+
if [ -d "$dir" ] && [ "$(basename "$dir")" != "example" ]; then
49+
cd "$dir" || continue
50+
npx promptfoo eval -o output.json --share || true
51+
cd - > /dev/null || exit
52+
fi
53+
done
54+
55+
- name: generate comments doc
56+
run: |
57+
node .github/workflows/scripts/collectPromptRes.js
58+
cat ./commentContent.txt
59+
60+
- uses: actions/github-script@v7
61+
with:
62+
github-token: ${{secrets.GITHUB_TOKEN}}
63+
script: |
64+
const fs = require('fs');
65+
const content = fs.readFileSync('./commentContent.txt', 'utf8');
66+
github.rest.issues.createComment({
67+
issue_number: context.issue.number,
68+
owner: context.repo.owner,
69+
repo: context.repo.repo,
70+
body: content
71+
})
72+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
const path = require('path');
2+
const fs = require('fs');
3+
const rootPath = path.join(__dirname, "..", "..", "..");
4+
const evalPromptDir = path.join(rootPath, "evalprompt");
5+
// collect results
6+
let commentContent = '';
7+
const directories = fs.readdirSync(evalPromptDir, { withFileTypes: true })
8+
.filter(dirent => dirent.isDirectory())
9+
.map(dirent => dirent.name);
10+
console.log(directories);
11+
for (const dir of directories) {
12+
const outputFile = path.join(evalPromptDir, dir, 'output.json');
13+
const baselineFile = path.join(evalPromptDir, dir, 'baseline.json');
14+
if (outputFile == null || !fs.existsSync(outputFile))
15+
continue;
16+
console.log(outputFile);
17+
const output = JSON.parse(fs.readFileSync(outputFile, 'utf8'));
18+
let body = ` LLM prompt result for ${dir}
19+
20+
| From | Success | Failure | Score |
21+
|---------|---------|---------|---------|
22+
| Output | ${output.results.stats.successes} | ${output.results.stats.failures} | ${output.results.prompts[0].metrics.score} |
23+
`;
24+
if (baselineFile && fs.existsSync(baselineFile)) {
25+
const baseline = JSON.parse(fs.readFileSync(baselineFile, 'utf8'));
26+
body += `|Baseline| ${baseline.success} | ${baseline.failure} | ${baseline.score} |
27+
`
28+
if (baseline.successes < output.results.stats.successes || baseline.failures > output.results.stats.failures || baseline.score > output.results.prompts[0].metrics.score) {
29+
body = `[Not PASS]🚨 ` + body;
30+
} else {
31+
body = `[PASS]✅ ` + body;
32+
}
33+
}
34+
if (output.shareableUrl) {
35+
body = body.concat(`\n**» [View eval results](${output.shareableUrl}) «**\n`);
36+
} else {
37+
body = body.concat('\n**» View eval results in CI console «**\n');
38+
}
39+
commentContent += body + "\n\n";
40+
}
41+
42+
fs.writeFileSync(path.join(rootPath, 'commentContent.txt'), commentContent, 'utf-8');

.vscode/extensions.json

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
{
22
// See http://go.microsoft.com/fwlink/?LinkId=827846
33
// for the documentation about the extensions.json format
4-
"recommendations": ["dbaeumer.vscode-eslint", "amodio.tsl-problem-matcher"]
4+
"recommendations": [
5+
"dbaeumer.vscode-eslint",
6+
"amodio.tsl-problem-matcher",
7+
"rioj7.command-variable"
8+
]
59
}

.vscode/tasks.json

+49
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,55 @@
3535
"npm: watch-tests"
3636
],
3737
"problemMatcher": []
38+
},
39+
{
40+
"label": "[hidden]promptfoo eval",
41+
"type": "shell",
42+
"command": "npx",
43+
"hide": true,
44+
"args": [
45+
"promptfoo",
46+
"eval"
47+
],
48+
"options": {
49+
"cwd": "${input:selectDir}"
50+
},
51+
"problemMatcher": []
52+
},
53+
{
54+
"label": "evaluate prompt",
55+
"type": "shell",
56+
"command": "npx",
57+
"args": [
58+
"promptfoo",
59+
"view",
60+
"-y"
61+
],
62+
"options": {
63+
"cwd": "${input:selectDir}"
64+
},
65+
"dependsOn": [
66+
"[hidden]promptfoo eval"
67+
],
68+
"problemMatcher": []
69+
}
70+
],
71+
"inputs": [
72+
{
73+
"id": "selectDir",
74+
"type": "command",
75+
"command": "extension.commandvariable.file.pickFile",
76+
"args": {
77+
"description": "Select Project Folder to Evaluate Prompt",
78+
"include": "evalprompt/*/promptfooconfig.yaml",
79+
"showDirs": false,
80+
"transform": {
81+
"text": "${fileDirname}",
82+
"find": "\\\\",
83+
"replace": "/",
84+
"flags": "g"
85+
}
86+
}
3887
}
3988
]
4089
}

PRERELEASE.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33

44
> Note: This changelog only includes the changes for the pre-release versions of Azure API Center for Visual Studio Code. For the changelog of stable versions, please refer to the [Changelog of Azure API Center for Visual Studio Code](https://github.com/microsoft/vscode-azureapicenter/blob/main/CHANGELOG.md).
55
6-
### January 15, 2024
6+
### February 27, 2025
7+
* Integrate with the `Teams Toolkit` extension to empower developers to create M365 Copilot Declarative Agents using API definitions from API Center.
8+
9+
### January 15, 2025
710
* Add support for API Analysis Profiles
811
* Add 'None' option to unselect active style guide
912

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ Export API specification document for a specific API either from the tree view o
6363

6464
![Export API specification](./media/export-api.gif)
6565

66+
Create M365 Copilot Declarative Agent
67+
68+
![Create M365 Copilot Declarative Agent](./media/create-declarative-agent.png)
69+
6670
## Pre-Release
6771

6872
For the features with `[Pre-Release]` flag, you need to install pre-release version of the extension.

evalprompt/README.md

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Evaluation Prompt Guide
2+
3+
## Local Debug
4+
5+
### In VSCode
6+
1. install [Command Variable](https://marketplace.visualstudio.com/items?itemName=rioj7.command-variable)
7+
1. `F1` -> `Tasks: Run Task` -> `evaluate prompt`
8+
1. select the target folder
9+
10+
### Terminal
11+
1. Switch to target folder
12+
1. run `npx promptfoo eval`
13+
1. run `npx promptfoo view -y`
14+
15+
## GitHub CI Pipeline
16+
When submitting a PR on GitHub, if there are code changes in the evalprompt folder, this pipeline will be triggered.
17+
The GitHub bot will reply under the PR with the results of the evaluation prompt. You can click on the hyperlink in the results to view more detailed content.
18+
19+
## Case Guide
20+
### Prompts
21+
- prompt from raw text
22+
```yaml
23+
prompts:
24+
- 'Translate the following text to French: "{{name}}: {{text}}"'
25+
- 'Translate the following text to German: "{{name}}: {{text}}"'
26+
```
27+
28+
- prompts as file
29+
```yaml
30+
prompts:
31+
- file://path/to/prompts.json
32+
- file://path/to/prompts.txt
33+
- file://path/to/prompts.js
34+
- file://path/to/prompts.py
35+
```
36+
37+
### Provider
38+
- built-in provider
39+
```yaml
40+
providers:
41+
- id: azure:chat:gpt-4o
42+
config:
43+
apiHost: "apicevaluationtestai.openai.azure.com"
44+
```
45+
46+
### custom provider
47+
```yaml
48+
providers:
49+
- id: file://customProvider.ts
50+
label: "My custom provider"
51+
```
52+
53+
### Asserts (or Evaluation)
54+
- Default Evaluation for all tests
55+
```yaml
56+
defaultTest:
57+
assert:
58+
- type: javascript
59+
value: file://spectral.mjs
60+
```
61+
62+
### Evaluation for each test
63+
```yaml
64+
tests:
65+
- description: 'Test that the output is cheap and fast'
66+
vars:
67+
example: 'Hello, World!'
68+
assert:
69+
- type: assert-set
70+
assert:
71+
- type: cost
72+
threshold: 0.001
73+
- type: latency
74+
threshold: 200
75+
```
76+
77+
### Tests (User inputs)
78+
- test from raw file
79+
```yaml
80+
tests:
81+
- file://relative/path/to/normal_test.yaml
82+
```
83+
84+
- tests from script (javascript, python, go or others)
85+
```yaml
86+
tests:
87+
- file://path/to/tests.js
88+
```
89+

evalprompt/example/README.md

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
This example uses a custom API provider in `customProvider.ts`. It also uses CSV test cases.
2+
3+
Create an `.env` file and set `AZURE_OPENAI_ENDPOINT`
4+
Run:
5+
6+
```
7+
promptfoo eval
8+
```
9+
10+
Full command-line equivalent:
11+
12+
```
13+
promptfoo eval --prompts prompts.txt --tests vars.csv --providers openai:chat --output output.json --providers customProvider.js
14+
```

evalprompt/example/customProvider.ts

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT license.
3+
import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
4+
import "@azure/openai/types";
5+
import "dotenv/config";
6+
import { AzureOpenAI } from "openai";
7+
import type { ApiProvider, ProviderOptions, ProviderResponse } from 'promptfoo';
8+
9+
export default class CustomApiProvider implements ApiProvider {
10+
protected providerId: string;
11+
public config: any;
12+
13+
constructor(options: ProviderOptions) {
14+
// The caller may override Provider ID (e.g. when using multiple instances of the same provider)
15+
this.providerId = options.id || 'custom provider';
16+
17+
// The config object contains any options passed to the provider in the config file.
18+
this.config = options.config;
19+
}
20+
21+
id(): string {
22+
return this.providerId;
23+
}
24+
25+
async callApi(prompt: string): Promise<ProviderResponse> {
26+
const scope = "https://cognitiveservices.azure.com/.default";
27+
const azureADTokenProvider = getBearerTokenProvider(new DefaultAzureCredential(), scope);
28+
const deployment = "gpt-4o";
29+
const apiVersion = "2024-05-01-preview";
30+
const client = new AzureOpenAI({ azureADTokenProvider, deployment, apiVersion });
31+
32+
const result = await client.chat.completions.create({
33+
messages: [
34+
{ role: "user", content: prompt }
35+
],
36+
model: "",
37+
max_tokens: 128
38+
});
39+
40+
const ret: ProviderResponse = {
41+
output: result.choices[0].message.content
42+
};
43+
return ret;
44+
}
45+
}
+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
2+
prompts:
3+
- file://prompts.txt
4+
5+
providers:
6+
- id: file://customProvider.ts
7+
label: "My custom provider"
8+
9+
tests:
10+
- vars:
11+
language: French
12+
input: Hello world
13+
assert:
14+
- type: javascript
15+
value: output.toLowerCase().includes('bonjour')
16+
- vars:
17+
language: German
18+
input: How's it going?
19+
assert:
20+
- type: similar
21+
value: was geht
22+
threshold: 0.6 # cosine similarity

evalprompt/example/prompts.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Rephrase this in {{language}}: {{input}}

evalprompt/example/vars.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
body
2+
Hello world
3+
I'm hungry

0 commit comments

Comments
 (0)