18
18
AZURE_CONTAINER_REPOSITORY_NAME : ${{ steps.config-env.outputs.AZURE_CONTAINER_REPOSITORY_NAME }}
19
19
AZURE_LOCATION : ${{ steps.config-env.outputs.AZURE_LOCATION }}
20
20
AZURE_RESOURCE_GROUP : ${{ steps.config-env.outputs.AZURE_RESOURCE_GROUP }}
21
- AZUREAI_RESOURCE_GROUP : ${{ steps.config-env.outputs.AZUREAI_RESOURCE_GROUP }}
21
+ AZUREAI_RESOURCE_GROUP : ${{ steps.config-env.outputs.AZURE_RESOURCE_GROUP }}
22
+ AZUREAI_PROJECT_NAME : ${{ steps.config-env.outputs.AZUREAI_PROJECT_NAME }}
22
23
AZURE_SUBSCRIPTION_ID : ${{ steps.config-env.outputs.AZURE_SUBSCRIPTION_ID }}
24
+ AZURE_SEARCH_ENDPOINT : ${{ steps.config-env.outputs.AZURE_SEARCH_ENDPOINT }}
25
+ AZURE_OPENAI_CHAT_DEPLOYMENT : ${{ steps.config-env.outputs.AZURE_OPENAI_CHAT_DEPLOYMENT }}
26
+ AZURE_OPENAI_EMBEDDING_DEPLOYMENT : ${{steps.config-env.outputs.AZURE_OPENAI_EMBEDDING_DEPLOYMENT}}
27
+ AZURE_OPENAI_EMBEDDING_MODEL : ${{steps.config-env.outputs.AZURE_OPENAI_EMBEDDING_MODEL}}
28
+ AZURE_OPENAI_API_VERSION : ${{steps.config-env.outputs.AZURE_OPENAI_API_VERSION}}
29
+ AZURE_OPENAI_ENDPOINT : ${{steps.config-env.outputs.AZURE_OPENAI_ENDPOINT}}
30
+ AZURE_OPENAI_NAME : ${{steps.config-env.outputs.AZURE_OPENAI_NAME}}
31
+ PROMPTFLOW_WORKER_NUM : ${{steps.config-env.outputs.PROMPTFLOW_WORKER_NUM}}
32
+ PROMPTFLOW_SERVING_ENGINE : ${{steps.config-env.outputs.PROMPTFLOW_SERVING_ENGINE}}
23
33
steps :
24
34
- uses : actions/checkout@v4
25
35
- name : Provision qa environment
@@ -40,12 +50,13 @@ jobs:
40
50
AZURE_CONTAINER_REPOSITORY_NAME : ${{ vars.AZURE_CONTAINER_REPOSITORY_NAME }}
41
51
AZURE_OPENAI_API_VERSION : ${{ vars.AZURE_OPENAI_API_VERSION }}
42
52
AZURE_OPENAI_CHAT_DEPLOYMENT : ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }}
53
+ AZURE_OPENAI_DEPLOYMENT : ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }}
43
54
AZURE_OPENAI_EMBEDDING_DEPLOYMENT : ${{ vars.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}
44
55
AZURE_OPENAI_EMBEDDING_MODEL : ${{ vars.AZURE_OPENAI_EMBEDDING_MODEL }}
45
56
AZURE_OPENAI_ENDPOINT : ${{ vars.AZURE_OPENAI_ENDPOINT }}
46
57
AZURE_OPENAI_NAME : ${{ vars.AZURE_OPENAI_NAME }}
47
58
AZURE_RESOURCE_GROUP : ${{ vars.AZURE_RESOURCE_GROUP }}
48
- AZUREAI_RESOURCE_GROUP : ${{ vars.AZUREAI_RESOURCE_GROUP }}
59
+ AZUREAI_RESOURCE_GROUP : ${{ vars.AZURE_RESOURCE_GROUP }}
49
60
AZURE_SEARCH_ENDPOINT : ${{ vars.AZURE_SEARCH_ENDPOINT }}
50
61
AZURE_SEARCH_INDEX_SAMPLE_DATA : ${{ vars.AZURE_SEARCH_INDEX_SAMPLE_DATA }}
51
62
AZURE_SEARCH_NAME : ${{ vars.AZURE_SEARCH_NAME }}
@@ -59,13 +70,91 @@ jobs:
59
70
AZURE_STORAGE_ACCOUNT_NAME : ${{ vars.AZURE_STORAGE_ACCOUNT_NAME }}
60
71
61
72
qa-flow-evaluation :
73
+ needs : [setup-env-job]
74
+ runs-on : ubuntu-latest
75
+ environment : qa
76
+ steps :
77
+ - name : Checkout
78
+ uses : actions/checkout@v4
79
+
80
+ - name : Setup Python
81
+ uses : actions/setup-python@v5
82
+ with :
83
+ python-version : 3.11
84
+
85
+ - name : Install dependencies
86
+ run : |
87
+ python -m pip install --upgrade pip
88
+ pip install -r requirements.txt
89
+
90
+ - name : Login to Azure CLI
91
+ uses : azure/login@v2
92
+ with :
93
+ creds : ${{ secrets.AZURE_CREDENTIALS }}
94
+
95
+ - name : AI-Based quality evaluation
96
+ env :
97
+ AZURE_OPENAI_ENDPOINT : ${{needs.setup-env-job.outputs.AZURE_OPENAI_ENDPOINT}}
98
+ AZURE_OPENAI_DEPLOYMENT : ${{needs.setup-env-job.outputs.AZURE_OPENAI_CHAT_DEPLOYMENT}}
99
+ AZUREAI_PROJECT_NAME : ${{needs.setup-env-job.outputs.AZUREAI_PROJECT_NAME}}
100
+ AZURE_OPENAI_API_VERSION : ${{needs.setup-env-job.outputs.AZURE_OPENAI_API_VERSION}}
101
+ AZURE_SUBSCRIPTION_ID : ${{needs.setup-env-job.outputs.AZURE_SUBSCRIPTION_ID}}
102
+ AZURE_RESOURCE_GROUP : ${{needs.setup-env-job.outputs.AZURE_RESOURCE_GROUP}}
103
+ AZURE_OPENAI_NAME : ${{needs.setup-env-job.outputs.AZURE_OPENAI_NAME}}
104
+ LOCATION : ${{needs.setup-env-job.outputs.AZURE_LOCATION}}
105
+ AZURE_CONTAINER_REGISTRY_NAME : ${{needs.setup-env-job.outputs.AZURE_CONTAINER_REGISTRY_NAME}}
106
+ AZURE_CONTAINER_REPOSITORY_NAME : ${{needs.setup-env-job.outputs.AZURE_CONTAINER_REPOSITORY_NAME}}
107
+ AZURE_SEARCH_ENDPOINT : ${{needs.setup-env-job.outputs.AZURE_SEARCH_ENDPOINT}}
108
+ AZURE_OPENAI_CHAT_DEPLOYMENT : ${{needs.setup-env-job.outputs.AZURE_OPENAI_CHAT_DEPLOYMENT}}
109
+ AZURE_OPENAI_EMBEDDING_DEPLOYMENT : ${{needs.setup-env-job.outputs.AZURE_OPENAI_EMBEDDING_DEPLOYMENT}}
110
+ AZURE_OPENAI_EMBEDDING_MODEL : ${{needs.setup-env-job.outputs.AZURE_OPENAI_EMBEDDING_MODEL}}
111
+ PROMPTFLOW_WORKER_NUM : ${{needs.setup-env-job.outputs.PROMPTFLOW_WORKER_NUM}}
112
+ PROMPTFLOW_SERVING_ENGINE : ${{needs.setup-env-job.outputs.PROMPTFLOW_SERVING_ENGINE}}
113
+ AZURE_APP_SERVICE_NAME : ${{needs.setup-env-job.outputs.AZURE_APP_SERVICE_NAME}}
114
+ AZURE_APP_SERVICE_PLAN_NAME : ${{needs.setup-env-job.outputs.AZURE_APP_SERVICE_PLAN_NAME}}
115
+ run : |
116
+ echo "Run AI-Based flow evaluation"
117
+ echo "AZURE_RESOURCE_GROUP=${AZURE_RESOURCE_GROUP}"
118
+ echo "AZURE_OPENAI_NAME=${AZURE_OPENAI_NAME}"
119
+ export AZURE_OPENAI_API_KEY=$(az cognitiveservices account keys list --resource-group ${AZURE_RESOURCE_GROUP} --name ${AZURE_OPENAI_NAME} --query "key1" --output tsv)
120
+ # python evaluations/flow_eval.py
121
+ python evaluations/qa_quality_eval.py
122
+ shell : bash
123
+
124
+ - name : Upload evaluation results
125
+ uses : actions/upload-artifact@v4
126
+ with :
127
+ name : flow-eval-results
128
+ path : qa_flow_quality_eval.json.json
129
+
130
+ qa-safety-evaluation :
62
131
needs : setup-env-job
63
132
runs-on : ubuntu-latest
64
133
environment : qa
65
134
steps :
66
- - name : Evaluate Flow
135
+ - name : AI-Based safety evaluation
136
+ env :
137
+ AZUREAI_PROJECT_NAME : ${{needs.setup-env-job.outputs.AZUREAI_PROJECT_NAME}}
138
+ AZURE_SUBSCRIPTION_ID : ${{needs.setup-env-job.outputs.AZURE_SUBSCRIPTION_ID}}
139
+ AZUREAI_RESOURCE_GROUP : ${{needs.setup-env-job.outputs.AZURE_RESOURCE_GROUP}}
140
+ AZURE_LOCATION : ${{needs.setup-env-job.outputs.AZURE_LOCATION}}
141
+ PREFIX : ${{ github.sha }}
67
142
run : |
68
- echo "Run QA AI-Based Flow evaluation"
143
+ echo "Run Automated Adversarial Testing"
144
+ export PYTHONPATH=./src:$PYTHONPATH
145
+ python evaluations/safety_eval.py
146
+
147
+ - name : Upload evaluation results
148
+ uses : actions/upload-artifact@v4
149
+ with :
150
+ name : adversarial-test
151
+ path : adversarial_test.json
152
+
153
+ - name : Upload evaluation results
154
+ uses : actions/upload-artifact@v4
155
+ with :
156
+ name : adversarial-test-w-jailbreak
157
+ path : adversarial_test_w_jailbreak.json
69
158
70
159
integration-testing :
71
160
needs : [setup-env-job, qa-flow-evaluation]
0 commit comments