diff --git a/.cspell.json b/.cspell.json
index 08eec76f0bf..ca7a17bebfc 100644
--- a/.cspell.json
+++ b/.cspell.json
@@ -50,7 +50,8 @@
".github/actions/**",
".github/pipelines/**",
".github/CODEOWNERS",
- "src/promptflow-evals/tests/**"
+ "src/promptflow-evals/tests/**",
+ "benchmark/promptflow-serve/result-archive/**"
],
"words": [
"aoai",
@@ -240,7 +241,8 @@
"Machinal",
"azureopenaimodelconfiguration",
"openaimodelconfiguration",
- "usecwd"
+ "usecwd",
+ "locustio"
],
"flagWords": [
"Prompt Flow"
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 1b0f2ac6b8c..69ee0d6e7c0 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -4,6 +4,9 @@
"context": ".",
"dockerFile": "Dockerfile",
"runArgs": ["-v", "/var/run/docker.sock:/var/run/docker.sock"],
+ "remoteEnv": {
+ "HOST_PROJECT_PATH": "${localWorkspaceFolder}"
+ },
"customizations": {
"codespaces": {
"openFiles": ["README.md", "examples/README.md"]
diff --git a/.gitignore b/.gitignore
index 957a0f44ed6..9ef59b176df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -197,3 +197,6 @@ src/promptflow-*/promptflow/__init__.py
# Eclipse project files
**/.project
**/.pydevproject
+
+# benchmark results
+benchmark/promptflow-serve/test_runner/locust-results/
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/README.md b/benchmark/promptflow-serve/README.md
new file mode 100644
index 00000000000..153d48f4b60
--- /dev/null
+++ b/benchmark/promptflow-serve/README.md
@@ -0,0 +1,97 @@
+# Introduction
+
+This directory contains scripts to test the throughput scalability of various [PromptFlow](https://microsoft.github.io/promptflow/) flows, using sync/async HTTP calls. It contains:
+- A mock API service ([FastAPI](https://fastapi.tiangolo.com/) + [uvicorn](https://www.uvicorn.org/)) and Docker file to run as a service;
+- Three different PromptFlow flows which include a node to query the mock API service:
+ - A [FlexFlow](https://microsoft.github.io/promptflow/tutorials/flex-flow-quickstart.html)-based flow with an async call to the mock API service;
+ - Two [static DAG](https://microsoft.github.io/promptflow/tutorials/quickstart.html) flows, each which call the mock API service, one using an async call, the other sync;
+- A set of bash and [Docker Compose](https://docs.docker.com/compose/) scripts to build and run each of the above services;
+- A script to run [Locust](https://locust.io/) jobs to measure the scalability of each of the PF flow services.
+
+# Contents
+
+```
+├── README.md (this README file)
+├── makefile (Makefile with the commands to build and run tests)
+├── mock_api (a mock API service which simply waits before returning JSON)
+│ ├── Dockerfile
+│ ├── main.py
+│ └── requirements.txt
+├── pf_flows (various PromptFlow flows which call the mock API service using sync/async HTTP calls)
+│ ├── flex_async (async flexflow example)
+│ │ ├── flow.flex.yaml
+│ │ ├── flow.py
+│ │ └── requirements.txt
+│ ├── static_async (async static DAG example)
+│ │ ├── chat.py
+│ │ ├── flow.dag.yaml
+│ │ └── requirements.txt
+│ └── static_sync (sync static DAG example)
+│ ├── chat.py
+│ ├── flow.dag.yaml
+│ └── requirements.txt
+├── requirements.txt (pip requirements for developing the tests)
+└── test_runner (scripts to perform scalability tests against each of the PF flows)
+ ├── locust_results (this is where the locust results will be stored)
+ ├── build.sh (builds the docker images for each of the services above)
+ ├── docker-compose.yml (manages starting up all the docker-based services)
+ ├── mock_locustfile.py (locust test spec for testing the capacity of the mock API service)
+ ├── pf_locustfile.py (locust test spec for testing the capacity of the PF flow services)
+ ├── run_locust.sh (locust runner used in the tests)
+ ├── settings.env (env file with the configuration used in the tests)
+ └── test.sh (orchestrates running the tests)
+```
+
+# Preparing the environment
+
+## Prerequisites
+
+### Software
+
+Build the provided devcontainer and use it for running tests.
+
+### Hardware
+
+A host machine with at least 8 vCPU threads.
+
+## Building the services
+
+- `make install-requirements`
+- `make build`
+
+This script will visit each of the service directories (`mock_api`, `pf_flows/flex_async`, `pf_flows/static_async`, and `pf_flows/static_sync`) and create docker images for each.
+
+Once this is complete, you can verify the services were built with `docker image ls`, for example:
+```
+REPOSITORY TAG IMAGE ID CREATED SIZE
+fastapi-wait-service latest 6bc9152b6b9b 32 minutes ago 184MB
+pf-flex-async-service latest d14cc15f45ad 33 minutes ago 1.58GB
+pf-static-sync-service latest 8b5ac2dac32c 34 minutes ago 1.58GB
+pf-static-async-service latest ff2968d3ef11 34 minutes ago 1.58GB
+```
+
+To test each of the services, you can try:
+- Mock API service: `curl "http://localhost:50001/"`
+- Static DAG async PF service: `curl --request POST 'http://localhost:8081/score' --header 'Content-Type: application/json' --data '{"question": "Test question", "chat_history": []}'`
+- Static DAG sync PF service: `curl --request POST 'http://localhost:8082/score' --header 'Content-Type: application/json' --data '{"question": "Test question", "chat_history": []}'`
+- FlexFlow async PF service: `curl --request POST 'http://localhost:8083/score' --header 'Content-Type: application/json' --data '{"question": "Test question", "chat_history": []}'`
+
+## Running each of the throughput tests
+
+The mock API service simply waits every time a request is made, and returns JSON after the wait has ended. The wait time is configurable, but set to 1 second in the docker compose script.
+
+In order to test the throughput latency of PF flows which call this service, we first need to establish a baseline of throughput for this mock service. Once we have this, we would expect all PF flows to have the same or similar throughput latency as all they are programmed to do is call this service and return.
+
+The `benchmark/promptflow-serve/makefile` supports four tests:
+- `make test-mock`: Run the throughput tests on the mock API service to determine a baseline.
+- `make test-staticsync`: Run the throughput tests on the PF static sync DAG flow service.
+- `make test-staticasync`: Run the throughput tests on the PF static async DAG flow service.
+- `make test-flexasync`: Run the throughput tests on the PF flex flow async service.
+
+## Test parameters
+
+They can be controlled in the `benchmark/promptflow-serve/test_runner/settings.env` file.
+
+## Results
+
+The results are stored in the `/locust-results` folder. There are interactive HTML reports which present the results as graphs as well.
diff --git a/benchmark/promptflow-serve/makefile b/benchmark/promptflow-serve/makefile
new file mode 100644
index 00000000000..50034aef9a8
--- /dev/null
+++ b/benchmark/promptflow-serve/makefile
@@ -0,0 +1,20 @@
+install-requirements:
+ pip install -r requirements.txt
+
+build:
+ cd test_runner && ./build.sh
+
+stop-all-tests:
+ cd test_runner && docker-compose down --remove-orphans
+
+test-mock:
+ cd test_runner && ./test.sh mock
+
+test-staticsync:
+ cd test_runner && ./test.sh staticsync
+
+test-staticasync:
+ cd test_runner && ./test.sh staticasync
+
+test-flexasync:
+ cd test_runner && ./test.sh flexasync
diff --git a/benchmark/promptflow-serve/mock_api/Dockerfile b/benchmark/promptflow-serve/mock_api/Dockerfile
new file mode 100644
index 00000000000..9fd69bb4cdd
--- /dev/null
+++ b/benchmark/promptflow-serve/mock_api/Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+
+EXPOSE 50001
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "50001"]
diff --git a/benchmark/promptflow-serve/mock_api/main.py b/benchmark/promptflow-serve/mock_api/main.py
new file mode 100644
index 00000000000..e28a958fcfa
--- /dev/null
+++ b/benchmark/promptflow-serve/mock_api/main.py
@@ -0,0 +1,26 @@
+import asyncio
+import os
+import random
+
+from fastapi import FastAPI
+
+random.seed(42)
+app = FastAPI()
+
+
+@app.get("/")
+async def wait_and_return():
+
+ min_wait_time_sec = int(os.getenv("MIN_WAIT_TIME_SEC", 1))
+ max_wait_time_sec = int(os.getenv("MAX_WAIT_TIME_SEC", 5))
+
+ # generate a random number of seconds to sleep between min and max.
+ random_float = random.uniform(min_wait_time_sec, max_wait_time_sec)
+ await asyncio.sleep(random_float)
+
+ # return a message to say just how long the service waited for
+ return {
+ "total_time_sec": random_float,
+ "min_wait_time_sec": min_wait_time_sec,
+ "max_wait_time_sec": max_wait_time_sec,
+ }
diff --git a/benchmark/promptflow-serve/mock_api/requirements.txt b/benchmark/promptflow-serve/mock_api/requirements.txt
new file mode 100644
index 00000000000..34263d1668d
--- /dev/null
+++ b/benchmark/promptflow-serve/mock_api/requirements.txt
@@ -0,0 +1,2 @@
+fastapi==0.111.0
+uvicorn==0.30.1
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/pf_flows/flex_async/flow.flex.yaml b/benchmark/promptflow-serve/pf_flows/flex_async/flow.flex.yaml
new file mode 100644
index 00000000000..6a8b154643b
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/flex_async/flow.flex.yaml
@@ -0,0 +1,5 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+entry: flow:ChatFlow
+environment:
+ # image: mcr.microsoft.com/azureml/promptflow/promptflow-python
+ python_requirements_txt: requirements.txt
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/pf_flows/flex_async/flow.py b/benchmark/promptflow-serve/pf_flows/flex_async/flow.py
new file mode 100644
index 00000000000..b2be1daad7a
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/flex_async/flow.py
@@ -0,0 +1,60 @@
+import asyncio
+import os
+import time
+from pathlib import Path
+
+import aiohttp
+
+BASE_DIR = Path(__file__).absolute().parent
+
+
+class ChatFlow:
+ def __init__(self):
+ pass
+
+ async def __call__(self, question: str, chat_history: list) -> str: # noqa: B006
+
+ node_instance1 = Node()
+ node_instance2 = Node()
+ node_instance3 = Node()
+
+ # create a list of tasks
+ tasks = [
+ self.call_node(node_instance1),
+ self.call_node(node_instance2),
+ self.call_node(node_instance3)
+ ]
+
+ # simulate calling parallel nodes
+ await asyncio.gather(*tasks)
+
+ chat_history = chat_history or []
+ start_time = time.time()
+
+ # make a call to the mock endpoint
+ url = os.getenv("MOCK_API_ENDPOINT", None)
+ if url is None:
+ raise RuntimeError("Failed to read MOCK_API_ENDPOINT env var.")
+
+ async with aiohttp.ClientSession() as session:
+ async with session.get(url) as response:
+ if response.status == 200:
+ response_dict = await response.json()
+ end_time = time.time()
+ response_dict["pf_node_time_sec"] = end_time - start_time
+ response_dict["type"] = "pf_flex_async"
+ return response_dict
+ else:
+ raise RuntimeError(f"Failed call to {url}: {response.status}")
+
+ async def call_node(self, node_instance: any):
+ await node_instance()
+
+
+class Node:
+ def __init__(self):
+ pass
+
+ async def __call__(self) -> str: # noqa: B006
+ await asyncio.sleep(0.25)
+ return "completed"
diff --git a/benchmark/promptflow-serve/pf_flows/flex_async/requirements.txt b/benchmark/promptflow-serve/pf_flows/flex_async/requirements.txt
new file mode 100644
index 00000000000..1c8b5d9ad63
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/flex_async/requirements.txt
@@ -0,0 +1,6 @@
+promptflow==1.12.0
+promptflow[azure]==1.12.0
+promptflow-tools==1.4.0
+fastapi==0.111.0
+uvicorn==0.30.1
+aiohttp==3.9.5
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/pf_flows/static_async/chat.py b/benchmark/promptflow-serve/pf_flows/static_async/chat.py
new file mode 100644
index 00000000000..f83ab3d8e64
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_async/chat.py
@@ -0,0 +1,27 @@
+import os
+import time
+
+import aiohttp
+from promptflow.core import tool
+
+
+@tool
+async def my_python_tool(node1: str, node2: str, node3: str) -> str:
+
+ start_time = time.time()
+
+ # make a call to the mock endpoint
+ url = os.getenv("MOCK_API_ENDPOINT", None)
+ if url is None:
+ raise RuntimeError("Failed to read MOCK_API_ENDPOINT env var.")
+
+ async with aiohttp.ClientSession() as session:
+ async with session.get(url) as response:
+ if response.status == 200:
+ response_dict = await response.json()
+ end_time = time.time()
+ response_dict["pf_node_time_sec"] = end_time - start_time
+ response_dict["type"] = "pf_dag_async"
+ return response_dict
+ else:
+ raise RuntimeError(f"Failed call to {url}: {response.status}")
diff --git a/benchmark/promptflow-serve/pf_flows/static_async/flow.dag.yaml b/benchmark/promptflow-serve/pf_flows/static_async/flow.dag.yaml
new file mode 100644
index 00000000000..ed5cf5d3b50
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_async/flow.dag.yaml
@@ -0,0 +1,50 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+inputs:
+ chat_history:
+ type: list
+ default: []
+ question:
+ type: string
+ is_chat_input: true
+ default: What is ChatGPT?
+outputs:
+ answer:
+ type: string
+ reference: ${chat.output}
+ is_chat_output: true
+nodes:
+- name: node1
+ type: python
+ inputs:
+ chat_history: ${inputs.chat_history}
+ question: ${inputs.question}
+ source:
+ type: code
+ path: node1.py
+- name: node2
+ type: python
+ inputs:
+ chat_history: ${inputs.chat_history}
+ question: ${inputs.question}
+ source:
+ type: code
+ path: node2.py
+- name: node3
+ type: python
+ inputs:
+ chat_history: ${inputs.chat_history}
+ question: ${inputs.question}
+ source:
+ type: code
+ path: node3.py
+- name: chat
+ type: python
+ inputs:
+ node1: ${node1.output}
+ node2: ${node2.output}
+ node3: ${node3.output}
+ source:
+ type: code
+ path: chat.py
+environment:
+ python_requirements_txt: requirements.txt
diff --git a/benchmark/promptflow-serve/pf_flows/static_async/node1.py b/benchmark/promptflow-serve/pf_flows/static_async/node1.py
new file mode 100644
index 00000000000..a72fdcd2e28
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_async/node1.py
@@ -0,0 +1,10 @@
+import asyncio
+from promptflow.core import tool
+
+
+@tool
+async def my_python_tool(chat_history: list, question: str) -> str:
+
+ # sleep for 250ms to simulate open ai call async
+ await asyncio.sleep(0.25)
+ return "completed"
diff --git a/benchmark/promptflow-serve/pf_flows/static_async/node2.py b/benchmark/promptflow-serve/pf_flows/static_async/node2.py
new file mode 100644
index 00000000000..a72fdcd2e28
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_async/node2.py
@@ -0,0 +1,10 @@
+import asyncio
+from promptflow.core import tool
+
+
+@tool
+async def my_python_tool(chat_history: list, question: str) -> str:
+
+ # sleep for 250ms to simulate open ai call async
+ await asyncio.sleep(0.25)
+ return "completed"
diff --git a/benchmark/promptflow-serve/pf_flows/static_async/node3.py b/benchmark/promptflow-serve/pf_flows/static_async/node3.py
new file mode 100644
index 00000000000..a72fdcd2e28
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_async/node3.py
@@ -0,0 +1,10 @@
+import asyncio
+from promptflow.core import tool
+
+
+@tool
+async def my_python_tool(chat_history: list, question: str) -> str:
+
+ # sleep for 250ms to simulate open ai call async
+ await asyncio.sleep(0.25)
+ return "completed"
diff --git a/benchmark/promptflow-serve/pf_flows/static_async/requirements.txt b/benchmark/promptflow-serve/pf_flows/static_async/requirements.txt
new file mode 100644
index 00000000000..1c8b5d9ad63
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_async/requirements.txt
@@ -0,0 +1,6 @@
+promptflow==1.12.0
+promptflow[azure]==1.12.0
+promptflow-tools==1.4.0
+fastapi==0.111.0
+uvicorn==0.30.1
+aiohttp==3.9.5
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/pf_flows/static_sync/chat.py b/benchmark/promptflow-serve/pf_flows/static_sync/chat.py
new file mode 100644
index 00000000000..c5e9429f148
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_sync/chat.py
@@ -0,0 +1,27 @@
+import os
+import time
+
+import requests
+from promptflow.core import tool
+
+
+@tool
+def my_python_tool(node1: str, node2: str, node3: str) -> str:
+
+ start_time = time.time()
+
+ # make a call to the mock endpoint
+ url = os.getenv("MOCK_API_ENDPOINT", None)
+ if url is None:
+ raise RuntimeError("Failed to read MOCK_API_ENDPOINT env var.")
+
+ # respond with the service call and tool total times
+ response = requests.get(url)
+ if response.status_code == 200:
+ response_dict = response.json()
+ end_time = time.time()
+ response_dict["pf_node_time_sec"] = end_time - start_time
+ response_dict["type"] = "pf_dag_sync"
+ return response_dict
+ else:
+ raise RuntimeError(f"Failed call to {url}: {response.status_code}")
diff --git a/benchmark/promptflow-serve/pf_flows/static_sync/flow.dag.yaml b/benchmark/promptflow-serve/pf_flows/static_sync/flow.dag.yaml
new file mode 100644
index 00000000000..ed5cf5d3b50
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_sync/flow.dag.yaml
@@ -0,0 +1,50 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+inputs:
+ chat_history:
+ type: list
+ default: []
+ question:
+ type: string
+ is_chat_input: true
+ default: What is ChatGPT?
+outputs:
+ answer:
+ type: string
+ reference: ${chat.output}
+ is_chat_output: true
+nodes:
+- name: node1
+ type: python
+ inputs:
+ chat_history: ${inputs.chat_history}
+ question: ${inputs.question}
+ source:
+ type: code
+ path: node1.py
+- name: node2
+ type: python
+ inputs:
+ chat_history: ${inputs.chat_history}
+ question: ${inputs.question}
+ source:
+ type: code
+ path: node2.py
+- name: node3
+ type: python
+ inputs:
+ chat_history: ${inputs.chat_history}
+ question: ${inputs.question}
+ source:
+ type: code
+ path: node3.py
+- name: chat
+ type: python
+ inputs:
+ node1: ${node1.output}
+ node2: ${node2.output}
+ node3: ${node3.output}
+ source:
+ type: code
+ path: chat.py
+environment:
+ python_requirements_txt: requirements.txt
diff --git a/benchmark/promptflow-serve/pf_flows/static_sync/node1.py b/benchmark/promptflow-serve/pf_flows/static_sync/node1.py
new file mode 100644
index 00000000000..84920acdffe
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_sync/node1.py
@@ -0,0 +1,10 @@
+import time
+from promptflow.core import tool
+
+
+@tool
+def my_python_tool(chat_history: list, question: str) -> str:
+
+ # sleep for 250ms to simulate open ai call
+ time.sleep(0.25)
+ return "completed"
diff --git a/benchmark/promptflow-serve/pf_flows/static_sync/node2.py b/benchmark/promptflow-serve/pf_flows/static_sync/node2.py
new file mode 100644
index 00000000000..84920acdffe
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_sync/node2.py
@@ -0,0 +1,10 @@
+import time
+from promptflow.core import tool
+
+
+@tool
+def my_python_tool(chat_history: list, question: str) -> str:
+
+ # sleep for 250ms to simulate open ai call
+ time.sleep(0.25)
+ return "completed"
diff --git a/benchmark/promptflow-serve/pf_flows/static_sync/node3.py b/benchmark/promptflow-serve/pf_flows/static_sync/node3.py
new file mode 100644
index 00000000000..84920acdffe
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_sync/node3.py
@@ -0,0 +1,10 @@
+import time
+from promptflow.core import tool
+
+
+@tool
+def my_python_tool(chat_history: list, question: str) -> str:
+
+ # sleep for 250ms to simulate open ai call
+ time.sleep(0.25)
+ return "completed"
diff --git a/benchmark/promptflow-serve/pf_flows/static_sync/requirements.txt b/benchmark/promptflow-serve/pf_flows/static_sync/requirements.txt
new file mode 100644
index 00000000000..e5f2921d500
--- /dev/null
+++ b/benchmark/promptflow-serve/pf_flows/static_sync/requirements.txt
@@ -0,0 +1,5 @@
+promptflow==1.12.0
+promptflow[azure]==1.12.0
+promptflow-tools==1.4.0
+fastapi==0.111.0
+uvicorn==0.30.1
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/requirements.txt b/benchmark/promptflow-serve/requirements.txt
new file mode 100644
index 00000000000..31116cb3119
--- /dev/null
+++ b/benchmark/promptflow-serve/requirements.txt
@@ -0,0 +1,6 @@
+promptflow==1.12.0
+promptflow[azure]==1.12.0
+promptflow-tools==1.4.0
+fastapi==0.111.0
+uvicorn==0.30.1
+aiohttp==3.9.5
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/README.md b/benchmark/promptflow-serve/result-archive/v1.12.0/README.md
new file mode 100644
index 00000000000..ed6f20a8dcc
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/README.md
@@ -0,0 +1,37 @@
+# PromptFlow Serve v1.12.0
+
+## Config
+
+```text
+USERS=500
+HATCH_RATE=50
+RUN_TIME=60s
+PROMPTFLOW_WORKER_NUM=8
+PROMPTFLOW_WORKER_THREADS=4
+```
+## Flask + Sync Function Nodes
+
+| Type | Name | Request Count | Failure Count | Median Response Time | Average Response Time | Min Response Time | Max Response Time | Average Content Size | Requests/s | Failures/s | 50% | 66% | 75% | 80% | 90% | 95% | 98% | 99% | 99.9% | 99.99% | 100% |
+|------|------|---------------|---------------|---------------------|-----------------------|-------------------|-------------------|----------------------|-------------|-------------|-----|-----|-----|-----|-----|-----|-----|-----|-------|--------|------|
+| POST | //score | 2432 | 1 | 5800.0 | 8065.454250779605 | 1.944665999872086 | 25289.052397999967 | 136.6953125 | 41.17208026926904 | 0.016929309321245492 | 5800 | 10000 | 14000 | 15000 | 21000 | 25000 | 25000 | 25000 | 25000 | 25000 | 25000 |
+| | Aggregated | 2432 | 1 | 5800.0 | 8065.454250779605 | 1.944665999872086 | 25289.052397999967 | 136.6953125 | 41.17208026926904 | 0.016929309321245492 | 5800 | 10000 | 14000 | 15000 | 21000 | 25000 | 25000 | 25000 | 25000 | 25000 | 25000 |
+
+data:image/s3,"s3://crabby-images/8c9e2/8c9e2946e843e9534d862462b5b0f98596116b53" alt="Flask + Sync Function Nodes"
+
+## FastApi + Async Function Nodes
+
+| Type | Name | Request Count | Failure Count | Median Response Time | Average Response Time | Min Response Time | Max Response Time | Average Content Size | Requests/s | Failures/s | 50% | 66% | 75% | 80% | 90% | 95% | 98% | 99% | 99.9% | 99.99% | 100% |
+|------|------|---------------|---------------|---------------------|-----------------------|-------------------|-------------------|----------------------|-------------|-------------|-----|-----|-----|-----|-----|-----|-----|-----|-------|--------|------|
+| POST | //score | 9816 | 5 | 1300.0 | 1286.097093404957 | 1.1461390004114946 | 1674.347330999808 | 136.6794009779951 | 166.23424865808806 | 0.08467514703447843 | 1300 | 1300 | 1300 | 1300 | 1300 | 1400 | 1500 | 1500 | 1600 | 1700 | 1700 |
+| | Aggregated | 9816 | 5 | 1300.0 | 1286.097093404957 | 1.1461390004114946 | 1674.347330999808 | 136.6794009779951 | 166.23424865808806 | 0.08467514703447843 | 1300 | 1300 | 1300 | 1300 | 1300 | 1400 | 1500 | 1500 | 1600 | 1700 | 1700 |
+
+data:image/s3,"s3://crabby-images/f9a1d/f9a1deac25d9266601cb0ee3a84ee95281c8f8be" alt="FastAPI + Async Function Nodes"
+
+## FastApi + Flex Async Function Nodes
+
+| Type | Name | Request Count | Failure Count | Median Response Time | Average Response Time | Min Response Time | Max Response Time | Average Content Size | Requests/s | Failures/s | 50% | 66% | 75% | 80% | 90% | 95% | 98% | 99% | 99.9% | 99.99% | 100% |
+|------|------|---------------|---------------|---------------------|-----------------------|-------------------|-------------------|----------------------|-------------|-------------|-----|-----|-----|-----|-----|-----|-----|-----|-------|--------|------|
+| POST | //score | 9840 | 6 | 1300.0 | 1283.105743044001 | 0.6390180005837465 | 1894.6711440003128 | 126.67418699186992 | 166.46825073381316 | 0.10150503093525194 | 1300 | 1300 | 1300 | 1300 | 1300 | 1400 | 1500 | 1500 | 1800 | 1900 | 1900 |
+| | Aggregated | 9840 | 6 | 1300.0 | 1283.105743044001 | 0.6390180005837465 | 1894.6711440003128 | 126.67418699186992 | 166.46825073381316 | 0.10150503093525194 | 1300 | 1300 | 1300 | 1300 | 1300 | 1400 | 1500 | 1500 | 1800 | 1900 | 1900 |
+
+data:image/s3,"s3://crabby-images/12643/1264355ed4673ab3c06badad54eefc8e0c572a86" alt="FastApi + Flex Async Function Nodes"
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/fastapi-flex-async.png b/benchmark/promptflow-serve/result-archive/v1.12.0/fastapi-flex-async.png
new file mode 100644
index 00000000000..4bc78755e5c
Binary files /dev/null and b/benchmark/promptflow-serve/result-archive/v1.12.0/fastapi-flex-async.png differ
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/fastapi_async.png b/benchmark/promptflow-serve/result-archive/v1.12.0/fastapi_async.png
new file mode 100644
index 00000000000..54d53764d8a
Binary files /dev/null and b/benchmark/promptflow-serve/result-archive/v1.12.0/fastapi_async.png differ
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/flask_sync.png b/benchmark/promptflow-serve/result-archive/v1.12.0/flask_sync.png
new file mode 100644
index 00000000000..cc7cee9dbb5
Binary files /dev/null and b/benchmark/promptflow-serve/result-archive/v1.12.0/flask_sync.png differ
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50.html b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50.html
new file mode 100644
index 00000000000..ec008bcffc4
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50.html
@@ -0,0 +1,277 @@
+
+
+
+
+
+
+
+
+ Locust
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_exceptions.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_exceptions.csv
new file mode 100644
index 00000000000..e98950c38f5
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_exceptions.csv
@@ -0,0 +1,13 @@
+Count,Message,Traceback,Nodes
+6,Expecting value: line 1 column 1 (char 0)," File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 340, in run
+ self.execute_next_task()
+ File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 373, in execute_next_task
+ self.execute_task(self._task_queue.popleft())
+ File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 385, in execute_task
+ task(self)
+ File ""/mnt/locust/pf_locustfile.py"", line 8, in test_endpoint
+ print(response.status_code, response.elapsed.total_seconds(), response.json())
+ ^^^^^^^^^^^^^^^
+ File ""/opt/venv/lib/python3.11/site-packages/requests/models.py"", line 978, in json
+ raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
+",local
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_failures.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_failures.csv
new file mode 100644
index 00000000000..2e0b5c49bb2
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_failures.csv
@@ -0,0 +1,3 @@
+Method,Name,Error,Occurrences
+POST,//score,"ConnectionResetError(104, 'Connection reset by peer')",2
+POST,//score,RemoteDisconnected('Remote end closed connection without response'),4
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_stats.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_stats.csv
new file mode 100644
index 00000000000..bb43a035e13
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_stats.csv
@@ -0,0 +1,3 @@
+Type,Name,Request Count,Failure Count,Median Response Time,Average Response Time,Min Response Time,Max Response Time,Average Content Size,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%
+POST,//score,9840,6,1300.0,1283.105743044001,0.6390180005837465,1894.6711440003128,126.67418699186992,166.46825073381316,0.10150503093525194,1300,1300,1300,1300,1300,1400,1500,1500,1800,1900,1900
+,Aggregated,9840,6,1300.0,1283.105743044001,0.6390180005837465,1894.6711440003128,126.67418699186992,166.46825073381316,0.10150503093525194,1300,1300,1300,1300,1300,1400,1500,1500,1800,1900,1900
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_stats_history.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_stats_history.csv
new file mode 100644
index 00000000000..be2174df37c
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_flex_async_report_u500_h50_stats_history.csv
@@ -0,0 +1,57 @@
+Timestamp,User Count,Type,Name,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%,Total Request Count,Total Failure Count,Total Median Response Time,Total Average Response Time,Total Min Response Time,Total Max Response Time,Total Average Content Size
+1719476201,0,,Aggregated,0.000000,0.000000,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0.0,0,0,0
+1719476202,50,,Aggregated,0.000000,0.000000,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0.0,0,0,0
+1719476203,100,,Aggregated,0.000000,0.000000,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,50,0,1400.0,1359.9001552000482,1314.4047840005442,1408.9546380000684,126.78
+1719476204,150,,Aggregated,0.000000,0.000000,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,100,0,1400.0,1367.905144330007,1314.4047840005442,1415.7905369993387,126.78
+1719476205,200,,Aggregated,0.000000,0.000000,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,166,0,1400.0,1369.913189391568,1257.3883230006686,1424.7157970003173,126.72289156626506
+1719476206,250,,Aggregated,16.666667,0.000000,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,269,0,1400.0,1353.9404828512945,1257.3883230006686,1434.7095880002598,126.75836431226766
+1719476207,300,,Aggregated,25.000000,0.000000,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,1400,368,0,1400.0,1346.1709259184825,1257.3883230006686,1440.3719530000672,126.7336956521739
+1719476208,350,,Aggregated,35.800000,0.000000,1300,1400,1400,1400,1400,1400,1500,1500,1500,1500,1500,492,0,1300.0,1336.6668061300836,1257.1295059997283,1476.6103769998153,126.73577235772358
+1719476209,400,,Aggregated,46.500000,0.000000,1300,1400,1400,1400,1400,1400,1500,1500,1500,1500,1500,652,0,1300.0,1330.1356918681076,1256.809014999817,1489.0694969999458,126.75613496932516
+1719476210,450,,Aggregated,54.714286,0.000000,1300,1300,1400,1400,1400,1400,1400,1500,1500,1500,1500,809,1,1300.0,1319.8737024660204,0.6390180005837465,1489.0694969999458,126.61310259579729
+1719476211,500,,Aggregated,63.875000,0.000000,1300,1300,1400,1400,1400,1400,1400,1500,1500,1500,1500,985,1,1300.0,1317.0985490172702,0.6390180005837465,1489.0694969999458,126.63553299492386
+1719476212,500,,Aggregated,74.555556,0.000000,1300,1300,1400,1400,1400,1400,1400,1500,1500,1500,1500,1195,1,1300.0,1314.0434353389132,0.6390180005837465,1489.0694969999458,126.65941422594142
+1719476213,500,,Aggregated,83.400000,0.100000,1300,1300,1400,1400,1400,1400,1400,1500,1500,1500,1500,1350,1,1300.0,1309.4179583118487,0.6390180005837465,1489.0694969999458,126.66740740740741
+1719476214,500,,Aggregated,101.400000,0.100000,1300,1300,1300,1400,1400,1400,1400,1400,1500,1500,1500,1536,2,1300.0,1304.10273893945,0.6390180005837465,1489.0694969999458,126.59895833333333
+1719476215,500,,Aggregated,122.600000,0.100000,1300,1300,1300,1400,1400,1400,1400,1400,1500,1500,1500,1726,2,1300.0,1299.6914699125073,0.6390180005837465,1489.0694969999458,126.61181923522595
+1719476216,500,,Aggregated,133.400000,0.100000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,1902,2,1300.0,1296.100025674543,0.6390180005837465,1489.0694969999458,126.6267087276551
+1719476217,500,,Aggregated,148.000000,0.200000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,2085,2,1300.0,1293.0271883414814,0.6390180005837465,1489.0694969999458,126.6335731414868
+1719476218,500,,Aggregated,158.600000,0.200000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,2261,2,1300.0,1291.1167783286126,0.6390180005837465,1489.0694969999458,126.63998230871296
+1719476219,500,,Aggregated,165.000000,0.200000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,2445,2,1300.0,1289.0835896257645,0.6390180005837465,1489.0694969999458,126.6482617586912
+1719476220,500,,Aggregated,175.100000,0.200000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,2631,2,1300.0,1287.5369827635839,0.6390180005837465,1489.0694969999458,126.65526415811479
+1719476221,500,,Aggregated,178.700000,0.200000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,2805,2,1300.0,1285.9677186228118,0.6390180005837465,1489.0694969999458,126.65882352941176
+1719476222,500,,Aggregated,181.300000,0.200000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,2991,2,1300.0,1284.4150023503792,0.6390180005837465,1489.0694969999458,126.66232029421599
+1719476223,500,,Aggregated,184.700000,0.100000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,3176,2,1300.0,1283.0922763513804,0.6390180005837465,1489.0694969999458,126.66404282115869
+1719476224,500,,Aggregated,182.200000,0.100000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,3353,2,1300.0,1282.3481002889896,0.6390180005837465,1489.0694969999458,126.67163733969579
+1719476225,500,,Aggregated,180.500000,0.100000,1300,1300,1300,1300,1400,1400,1400,1400,1500,1500,1500,3550,2,1300.0,1281.4475179583014,0.6390180005837465,1489.0694969999458,126.67605633802818
+1719476226,500,,Aggregated,182.500000,0.100000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,3712,2,1300.0,1280.830726687491,0.6390180005837465,1489.0694969999458,126.68318965517241
+1719476227,500,,Aggregated,181.000000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,3891,2,1300.0,1280.0413535980374,0.6390180005837465,1489.0694969999458,126.68851195065535
+1719476228,500,,Aggregated,181.400000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,4081,2,1300.0,1279.51105992354,0.6390180005837465,1489.0694969999458,126.691987258025
+1719476229,500,,Aggregated,182.000000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,4259,2,1300.0,1279.2214130856937,0.6390180005837465,1489.0694969999458,126.69499882601549
+1719476230,500,,Aggregated,179.300000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,4436,2,1300.0,1278.5767649893974,0.6390180005837465,1489.0694969999458,126.69477006311993
+1719476231,500,,Aggregated,181.900000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,4619,2,1300.0,1278.1484213225735,0.6390180005837465,1489.0694969999458,126.69733708594934
+1719476232,500,,Aggregated,180.800000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,4800,2,1300.0,1277.9912853010353,0.6390180005837465,1489.0694969999458,126.69875
+1719476233,500,,Aggregated,178.000000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,4966,4,1300.0,1277.7606078137276,0.6390180005837465,1497.1565209998516,126.65042287555377
+1719476234,500,,Aggregated,181.100000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1500,1500,5142,4,1300.0,1278.3526808589988,0.6390180005837465,1507.3813139997583,126.6534422403734
+1719476235,500,,Aggregated,180.400000,0.000000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1600,1600,5345,4,1300.0,1279.3712435923235,0.6390180005837465,1593.6190920001536,126.6579981290926
+1719476236,500,,Aggregated,181.000000,0.200000,1300,1300,1300,1300,1300,1400,1400,1400,1500,1600,1600,5513,4,1300.0,1279.533954473239,0.6390180005837465,1593.6190920001536,126.66116452022493
+1719476237,500,,Aggregated,179.300000,0.200000,1300,1300,1300,1300,1300,1400,1400,1500,1700,1700,1700,5695,5,1300.0,1280.9238150517929,0.6390180005837465,1740.5036139998629,126.64179104477611
+1719476238,500,,Aggregated,179.300000,0.200000,1300,1300,1300,1300,1300,1400,1400,1500,1700,1800,1800,5866,5,1300.0,1282.5266233590119,0.6390180005837465,1781.4541899997494,126.64490282986704
+1719476239,500,,Aggregated,178.700000,0.200000,1300,1300,1300,1300,1300,1400,1400,1500,1800,1900,1900,6050,5,1300.0,1284.0742626437943,0.6390180005837465,1894.6711440003128,126.64793388429752
+1719476240,500,,Aggregated,179.500000,0.300000,1300,1300,1300,1300,1300,1400,1400,1500,1800,1900,1900,6219,5,1300.0,1285.1884838346946,0.6390180005837465,1894.6711440003128,126.64994372085545
+1719476241,500,,Aggregated,176.300000,0.300000,1300,1300,1300,1300,1300,1400,1500,1600,1800,1900,1900,6411,5,1300.0,1286.7778786733681,0.6390180005837465,1894.6711440003128,126.65387615036656
+1719476242,500,,Aggregated,178.900000,0.300000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,6574,5,1300.0,1287.8780452646733,0.6390180005837465,1894.6711440003128,126.65622147855187
+1719476243,500,,Aggregated,178.100000,0.300000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,6745,6,1300.0,1288.867500338763,0.6390180005837465,1894.6711440003128,126.63958487768717
+1719476244,500,,Aggregated,178.800000,0.300000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,6947,6,1300.0,1290.3303529794093,0.6390180005837465,1894.6711440003128,126.6422916366777
+1719476245,500,,Aggregated,177.700000,0.400000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,7121,6,1300.0,1289.7010438283894,0.6390180005837465,1894.6711440003128,126.64471282123297
+1719476246,500,,Aggregated,176.100000,0.200000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,7296,6,1300.0,1289.130111216139,0.6390180005837465,1894.6711440003128,126.64898574561404
+1719476247,500,,Aggregated,178.700000,0.200000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,7481,6,1300.0,1288.4846432680051,0.6390180005837465,1894.6711440003128,126.65084881700308
+1719476248,500,,Aggregated,178.200000,0.200000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,7666,6,1300.0,1287.8889019131163,0.6390180005837465,1894.6711440003128,126.65470910513957
+1719476249,500,,Aggregated,178.200000,0.200000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,7832,6,1300.0,1287.4213013797178,0.6390180005837465,1894.6711440003128,126.65640960163432
+1719476250,500,,Aggregated,178.800000,0.100000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,8032,6,1300.0,1286.9863334506908,0.6390180005837465,1894.6711440003128,126.65923804780877
+1719476251,500,,Aggregated,180.900000,0.100000,1300,1300,1300,1300,1400,1400,1500,1600,1800,1900,1900,8200,6,1300.0,1286.6996692731627,0.6390180005837465,1894.6711440003128,126.66
+1719476252,500,,Aggregated,177.700000,0.100000,1300,1300,1300,1300,1300,1400,1500,1600,1800,1900,1900,8378,6,1300.0,1286.2550968732314,0.6390180005837465,1894.6711440003128,126.66221055144426
+1719476253,500,,Aggregated,182.200000,0.100000,1300,1300,1300,1300,1300,1400,1500,1600,1800,1900,1900,8570,6,1300.0,1285.715607680039,0.6390180005837465,1894.6711440003128,126.66394399066512
+1719476254,500,,Aggregated,179.800000,0.100000,1300,1300,1300,1300,1300,1400,1500,1600,1800,1900,1900,8750,6,1300.0,1285.2174617211383,0.6390180005837465,1894.6711440003128,126.6648
+1719476255,500,,Aggregated,179.700000,0.000000,1300,1300,1300,1300,1300,1400,1500,1600,1800,1900,1900,8928,6,1300.0,1284.7858822354356,0.6390180005837465,1894.6711440003128,126.66677867383513
+1719476256,500,,Aggregated,182.100000,0.000000,1300,1300,1300,1300,1300,1400,1500,1600,1800,1900,1900,9101,6,1300.0,1284.4338008497941,0.6390180005837465,1894.6711440003128,126.66827821118558
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50.html b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50.html
new file mode 100644
index 00000000000..2ce3473e8f7
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50.html
@@ -0,0 +1,277 @@
+
+
+
+
+
+
+
+
+ Locust
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_exceptions.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_exceptions.csv
new file mode 100644
index 00000000000..bbacb5de0fd
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_exceptions.csv
@@ -0,0 +1,13 @@
+Count,Message,Traceback,Nodes
+5,Expecting value: line 1 column 1 (char 0)," File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 340, in run
+ self.execute_next_task()
+ File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 373, in execute_next_task
+ self.execute_task(self._task_queue.popleft())
+ File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 385, in execute_task
+ task(self)
+ File ""/mnt/locust/pf_locustfile.py"", line 8, in test_endpoint
+ print(response.status_code, response.elapsed.total_seconds(), response.json())
+ ^^^^^^^^^^^^^^^
+ File ""/opt/venv/lib/python3.11/site-packages/requests/models.py"", line 978, in json
+ raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
+",local
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_failures.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_failures.csv
new file mode 100644
index 00000000000..d289a271567
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_failures.csv
@@ -0,0 +1,3 @@
+Method,Name,Error,Occurrences
+POST,//score,"ConnectionResetError(104, 'Connection reset by peer')",3
+POST,//score,RemoteDisconnected('Remote end closed connection without response'),2
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_stats.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_stats.csv
new file mode 100644
index 00000000000..b5f53cd905e
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_stats.csv
@@ -0,0 +1,3 @@
+Type,Name,Request Count,Failure Count,Median Response Time,Average Response Time,Min Response Time,Max Response Time,Average Content Size,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%
+POST,//score,9816,5,1300.0,1286.097093404957,1.1461390004114946,1674.347330999808,136.6794009779951,166.23424865808806,0.08467514703447843,1300,1300,1300,1300,1300,1400,1500,1500,1600,1700,1700
+,Aggregated,9816,5,1300.0,1286.097093404957,1.1461390004114946,1674.347330999808,136.6794009779951,166.23424865808806,0.08467514703447843,1300,1300,1300,1300,1300,1400,1500,1500,1600,1700,1700
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_stats_history.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_stats_history.csv
new file mode 100644
index 00000000000..2c3befce5fa
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_async_report_u500_h50_stats_history.csv
@@ -0,0 +1,57 @@
+Timestamp,User Count,Type,Name,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%,Total Request Count,Total Failure Count,Total Median Response Time,Total Average Response Time,Total Min Response Time,Total Max Response Time,Total Average Content Size
+1719476527,0,,Aggregated,0.000000,0.000000,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0.0,0,0,0
+1719476528,50,,Aggregated,0.000000,0.000000,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0.0,0,0,0
+1719476529,100,,Aggregated,0.000000,0.000000,1400,1400,1500,1500,1500,1500,1500,1500,1500,1500,1500,50,0,1400.0,1412.6532768400464,1323.9506239997354,1512.5098160006019,136.86
+1719476530,150,,Aggregated,0.000000,0.000000,1400,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,100,0,1400.0,1429.0261966800422,1323.9506239997354,1512.5098160006019,136.74
+1719476531,200,,Aggregated,25.000000,0.000000,1400,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,169,0,1400.0,1417.3600108580217,1260.8454840001286,1512.5098160006019,136.7337278106509
+1719476532,250,,Aggregated,33.333333,0.000000,1400,1400,1500,1500,1500,1500,1500,1500,1500,1500,1500,261,0,1400.0,1385.5100273793394,1259.4001380002737,1512.5098160006019,136.73946360153258
+1719476533,300,,Aggregated,37.500000,0.000000,1400,1400,1400,1500,1500,1500,1500,1600,1600,1600,1600,359,0,1400.0,1376.6234442980651,1259.4001380002737,1569.597996000084,136.75487465181058
+1719476534,350,,Aggregated,50.000000,0.000000,1400,1400,1400,1400,1500,1500,1500,1500,1600,1600,1600,507,0,1400.0,1360.2018513254604,1259.4001380002737,1569.597996000084,136.7534516765286
+1719476535,400,,Aggregated,56.833333,0.000000,1400,1400,1400,1400,1500,1500,1500,1500,1600,1600,1600,631,0,1400.0,1354.5559720111007,1259.4001380002737,1569.597996000084,136.7353407290016
+1719476536,450,,Aggregated,65.000000,0.000000,1300,1400,1400,1400,1500,1500,1500,1500,1600,1600,1600,783,2,1300.0,1343.6711165223435,1.3990449997436372,1569.597996000084,136.38952745849298
+1719476537,500,,Aggregated,75.500000,0.000000,1300,1400,1400,1400,1500,1500,1500,1600,1600,1600,1600,991,2,1300.0,1341.693414632692,1.3990449997436372,1632.0277519998854,136.4561049445005
+1719476538,500,,Aggregated,82.333333,0.111111,1300,1400,1400,1400,1500,1500,1600,1600,1700,1700,1700,1164,2,1300.0,1346.3686839158013,1.3990449997436372,1674.347330999808,136.49656357388315
+1719476539,500,,Aggregated,91.100000,0.200000,1300,1400,1400,1400,1500,1500,1600,1600,1700,1700,1700,1321,2,1300.0,1340.821318617711,1.3990449997436372,1674.347330999808,136.52460257380773
+1719476540,500,,Aggregated,109.300000,0.200000,1300,1400,1400,1400,1500,1500,1600,1600,1700,1700,1700,1525,2,1300.0,1333.67690547213,1.3990449997436372,1674.347330999808,136.5639344262295
+1719476541,500,,Aggregated,122.300000,0.200000,1300,1300,1400,1400,1500,1500,1600,1600,1700,1700,1700,1715,2,1300.0,1328.141723218079,1.3990449997436372,1674.347330999808,136.58483965014577
+1719476542,500,,Aggregated,135.000000,0.200000,1300,1300,1400,1400,1400,1500,1600,1600,1700,1700,1700,1870,2,1300.0,1323.9669994588292,1.3990449997436372,1674.347330999808,136.59679144385026
+1719476543,500,,Aggregated,148.600000,0.200000,1300,1300,1400,1400,1400,1500,1500,1600,1700,1700,1700,2074,2,1300.0,1318.9810442483229,1.3990449997436372,1674.347330999808,136.6142719382835
+1719476544,500,,Aggregated,155.700000,0.200000,1300,1300,1300,1400,1400,1500,1500,1600,1700,1700,1700,2243,2,1300.0,1316.0796034748148,1.3990449997436372,1674.347330999808,136.62594739188586
+1719476545,500,,Aggregated,164.900000,0.200000,1300,1300,1300,1400,1400,1500,1500,1600,1700,1700,1700,2414,3,1300.0,1312.3403666400195,1.3990449997436372,1674.347330999808,136.57953603976802
+1719476546,500,,Aggregated,172.100000,0.200000,1300,1300,1300,1400,1400,1500,1500,1600,1700,1700,1700,2614,3,1300.0,1309.6173946377207,1.3990449997436372,1674.347330999808,136.5948737566947
+1719476547,500,,Aggregated,173.700000,0.300000,1300,1300,1300,1400,1400,1500,1500,1600,1700,1700,1700,2780,4,1300.0,1307.2730263928072,1.1461390004114946,1674.347330999808,136.55719424460432
+1719476548,500,,Aggregated,178.400000,0.200000,1300,1300,1300,1300,1400,1500,1500,1600,1700,1700,1700,2971,4,1300.0,1305.1274756987539,1.1461390004114946,1674.347330999808,136.56647593402894
+1719476549,500,,Aggregated,180.500000,0.100000,1300,1300,1300,1300,1400,1500,1500,1600,1700,1700,1700,3159,4,1300.0,1303.1172012522945,1.1461390004114946,1674.347330999808,136.5758151313707
+1719476550,500,,Aggregated,178.400000,0.200000,1300,1300,1300,1300,1400,1500,1500,1600,1700,1700,1700,3330,4,1300.0,1301.58101120931,1.1461390004114946,1674.347330999808,136.58648648648648
+1719476551,500,,Aggregated,179.900000,0.200000,1300,1300,1300,1300,1400,1500,1500,1600,1700,1700,1700,3497,4,1300.0,1300.536398334863,1.1461390004114946,1674.347330999808,136.59050614812696
+1719476552,500,,Aggregated,181.000000,0.200000,1300,1300,1300,1300,1400,1500,1500,1600,1700,1700,1700,3703,4,1300.0,1300.0928713289265,1.1461390004114946,1674.347330999808,136.60059411288145
+1719476553,500,,Aggregated,179.700000,0.200000,1300,1300,1300,1300,1400,1400,1500,1600,1700,1700,1700,3863,4,1300.0,1298.8732321454845,1.1461390004114946,1674.347330999808,136.60730002588662
+1719476554,500,,Aggregated,180.900000,0.200000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,4042,4,1300.0,1297.8513148810032,1.1461390004114946,1674.347330999808,136.61207323107374
+1719476555,500,,Aggregated,179.900000,0.200000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,4238,4,1300.0,1297.458102550736,1.1461390004114946,1674.347330999808,136.61915998112318
+1719476556,500,,Aggregated,179.200000,0.200000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,4408,4,1300.0,1296.3384855387958,1.1461390004114946,1674.347330999808,136.62590744101632
+1719476557,500,,Aggregated,182.300000,0.100000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,4600,4,1300.0,1295.1923032141324,1.1461390004114946,1674.347330999808,136.63086956521738
+1719476558,500,,Aggregated,179.700000,0.100000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,4770,4,1300.0,1294.4739444278864,1.1461390004114946,1674.347330999808,136.63438155136268
+1719476559,500,,Aggregated,180.000000,0.100000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,4953,4,1300.0,1293.7796975976216,1.1461390004114946,1674.347330999808,136.6390066626287
+1719476560,500,,Aggregated,182.000000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,5130,4,1300.0,1293.1235935323618,1.1461390004114946,1674.347330999808,136.64463937621832
+1719476561,500,,Aggregated,179.300000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,5301,4,1300.0,1292.4971668168303,1.1461390004114946,1674.347330999808,136.64648179588758
+1719476562,500,,Aggregated,179.500000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,5501,4,1300.0,1291.8662140220001,1.1461390004114946,1674.347330999808,136.65006362479548
+1719476563,500,,Aggregated,179.800000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,5677,4,1300.0,1291.493166841469,1.1461390004114946,1674.347330999808,136.65474722564736
+1719476564,500,,Aggregated,177.600000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,5841,4,1300.0,1290.9850203946266,1.1461390004114946,1674.347330999808,136.6577640814929
+1719476565,500,,Aggregated,180.600000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,6030,4,1300.0,1290.5590458154259,1.1461390004114946,1674.347330999808,136.6608623548922
+1719476566,500,,Aggregated,179.700000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,6216,4,1300.0,1290.2609338626144,1.1461390004114946,1674.347330999808,136.66457528957528
+1719476567,500,,Aggregated,179.000000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,6391,4,1300.0,1290.1224028873423,1.1461390004114946,1674.347330999808,136.66671882334532
+1719476568,500,,Aggregated,181.400000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,6571,4,1300.0,1290.2679250707683,1.1461390004114946,1674.347330999808,136.6691523360219
+1719476569,500,,Aggregated,178.100000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,6747,4,1300.0,1290.2910753093265,1.1461390004114946,1674.347330999808,136.6715577293612
+1719476570,500,,Aggregated,179.600000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,6928,4,1300.0,1289.9082478480123,1.1461390004114946,1674.347330999808,136.6743648960739
+1719476571,500,,Aggregated,180.900000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,7104,4,1300.0,1289.4153141610434,1.1461390004114946,1674.347330999808,136.6748310810811
+1719476572,500,,Aggregated,179.000000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,7289,4,1300.0,1289.0585187379693,1.1461390004114946,1674.347330999808,136.67540128961448
+1719476573,500,,Aggregated,179.500000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,7471,4,1300.0,1288.8968930548879,1.1461390004114946,1674.347330999808,136.67929326730024
+1719476574,500,,Aggregated,181.600000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,7657,4,1300.0,1288.6585314525373,1.1461390004114946,1674.347330999808,136.67911714770798
+1719476575,500,,Aggregated,178.500000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,7825,4,1300.0,1288.303429618539,1.1461390004114946,1674.347330999808,136.67974440894568
+1719476576,500,,Aggregated,179.300000,0.000000,1300,1300,1300,1300,1400,1400,1500,1500,1700,1700,1700,8003,4,1300.0,1287.959085470332,1.1461390004114946,1674.347330999808,136.68249406472572
+1719476577,500,,Aggregated,178.800000,0.000000,1300,1300,1300,1300,1300,1400,1500,1500,1700,1700,1700,8191,4,1300.0,1287.7642972516255,1.1461390004114946,1674.347330999808,136.68526431449152
+1719476578,500,,Aggregated,178.800000,0.000000,1300,1300,1300,1300,1300,1400,1500,1500,1700,1700,1700,8368,4,1300.0,1287.3661344972588,1.1461390004114946,1674.347330999808,136.68702198852773
+1719476579,500,,Aggregated,182.100000,0.000000,1300,1300,1300,1300,1300,1400,1500,1500,1700,1700,1700,8555,4,1300.0,1286.9558120748175,1.1461390004114946,1674.347330999808,136.6876680303916
+1719476580,500,,Aggregated,178.900000,0.000000,1300,1300,1300,1300,1300,1400,1500,1500,1700,1700,1700,8742,4,1300.0,1286.869790533868,1.1461390004114946,1674.347330999808,136.68817204301075
+1719476581,500,,Aggregated,179.600000,0.000000,1300,1300,1300,1300,1300,1400,1500,1500,1700,1700,1700,8917,4,1300.0,1287.1406386806186,1.1461390004114946,1674.347330999808,136.68902097117865
+1719476582,500,,Aggregated,180.000000,0.000000,1300,1300,1300,1300,1300,1400,1500,1500,1600,1700,1700,9085,4,1300.0,1287.0474121263699,1.1461390004114946,1674.347330999808,136.6905888827738
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50.html b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50.html
new file mode 100644
index 00000000000..9364398add8
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50.html
@@ -0,0 +1,277 @@
+
+
+
+
+
+
+
+
+ Locust
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_exceptions.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_exceptions.csv
new file mode 100644
index 00000000000..1b049bbbce5
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_exceptions.csv
@@ -0,0 +1,13 @@
+Count,Message,Traceback,Nodes
+1,Expecting value: line 1 column 1 (char 0)," File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 340, in run
+ self.execute_next_task()
+ File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 373, in execute_next_task
+ self.execute_task(self._task_queue.popleft())
+ File ""/opt/venv/lib/python3.11/site-packages/locust/user/task.py"", line 385, in execute_task
+ task(self)
+ File ""/mnt/locust/pf_locustfile.py"", line 8, in test_endpoint
+ print(response.status_code, response.elapsed.total_seconds(), response.json())
+ ^^^^^^^^^^^^^^^
+ File ""/opt/venv/lib/python3.11/site-packages/requests/models.py"", line 978, in json
+ raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
+",local
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_failures.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_failures.csv
new file mode 100644
index 00000000000..8846989fc03
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_failures.csv
@@ -0,0 +1,2 @@
+Method,Name,Error,Occurrences
+POST,//score,RemoteDisconnected('Remote end closed connection without response'),1
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_stats.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_stats.csv
new file mode 100644
index 00000000000..461f7646d62
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_stats.csv
@@ -0,0 +1,3 @@
+Type,Name,Request Count,Failure Count,Median Response Time,Average Response Time,Min Response Time,Max Response Time,Average Content Size,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%
+POST,//score,2432,1,5800.0,8065.454250779605,1.944665999872086,25289.052397999967,136.6953125,41.17208026926904,0.016929309321245492,5800,10000,14000,15000,21000,25000,25000,25000,25000,25000,25000
+,Aggregated,2432,1,5800.0,8065.454250779605,1.944665999872086,25289.052397999967,136.6953125,41.17208026926904,0.016929309321245492,5800,10000,14000,15000,21000,25000,25000,25000,25000,25000,25000
diff --git a/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_stats_history.csv b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_stats_history.csv
new file mode 100644
index 00000000000..1118d9b8999
--- /dev/null
+++ b/benchmark/promptflow-serve/result-archive/v1.12.0/pf_static_sync_report_u500_h50_stats_history.csv
@@ -0,0 +1,57 @@
+Timestamp,User Count,Type,Name,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%,Total Request Count,Total Failure Count,Total Median Response Time,Total Average Response Time,Total Min Response Time,Total Max Response Time,Total Average Content Size
+1719409747,0,,Aggregated,0.000000,0.000000,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0.0,0,0,0
+1719409748,50,,Aggregated,0.000000,0.000000,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,0,0,0,0.0,0,0,0
+1719409749,100,,Aggregated,0.000000,0.000000,1300,1300,1400,1400,1400,1400,1400,1400,1400,1400,1400,50,0,1308.3005599996795,1337.7559173799455,1308.3005599996795,1418.4093369999573,136.78
+1719409750,150,,Aggregated,0.000000,0.000000,1400,1400,1600,1600,1600,1600,1700,1700,1700,1700,1700,72,0,1400.0,1411.4304759166314,1308.3005599996795,1675.593490999745,136.76388888888889
+1719409751,200,,Aggregated,25.000000,0.000000,1400,1600,1600,1700,2800,2900,2900,2900,2900,2900,2900,115,0,1400.0,1614.0109206521506,1261.7988229999355,2941.5734620001786,136.7826086956522
+1719409752,250,,Aggregated,24.000000,0.000000,1400,1600,1600,1800,2800,2900,2900,2900,2900,2900,2900,137,0,1400.0,1633.0232634379372,1261.7988229999355,2941.5734620001786,136.8029197080292
+1719409753,300,,Aggregated,23.000000,0.000000,1500,1600,2400,2800,3100,3200,4200,4200,4200,4200,4200,162,0,1400.0,1868.995506518497,1261.7988229999355,4205.764570000156,136.7962962962963
+1719409754,350,,Aggregated,27.000000,0.000000,1500,1900,2800,2900,4100,4400,5400,5400,5400,5400,5400,205,0,1500.0,2076.0464092633856,1261.7988229999355,5418.845298999713,136.77560975609757
+1719409755,400,,Aggregated,27.000000,0.000000,1600,2600,3100,3400,4600,5400,5700,5700,5700,5700,5700,246,0,1600.0,2331.3119405934654,1261.3538949999565,5724.73182099975,136.78048780487805
+1719409756,450,,Aggregated,28.714286,0.000000,1600,2800,3400,4300,5400,5700,5900,6100,6100,6100,6100,292,0,1600.0,2517.296600962308,1261.3538949999565,6145.5700599999545,136.7705479452055
+1719409757,500,,Aggregated,30.750000,0.000000,1600,2800,3200,4200,5400,5700,6000,6100,6200,6200,6200,325,0,1600.0,2496.6932029107534,1261.3538949999565,6196.597149999889,136.75076923076924
+1719409758,500,,Aggregated,30.333333,0.000000,1600,2900,4200,4600,5800,6100,7200,7200,7300,7300,7300,366,0,1600.0,2692.2256483005317,1261.0177900000963,7284.156900000198,136.7486338797814
+1719409759,500,,Aggregated,30.333333,0.000000,1600,3100,4400,5400,6100,7200,7500,8500,8500,8500,8500,410,0,1600.0,2935.3733606341416,1261.0177900000963,8549.785410999903,136.7560975609756
+1719409760,500,,Aggregated,36.300000,0.000000,1700,3600,4900,5700,7200,8400,8700,9700,9800,9800,9800,465,0,1700.0,3170.828777591395,1261.0177900000963,9767.886062999878,136.7720430107527
+1719409761,500,,Aggregated,36.000000,0.000000,1700,4100,5000,5700,7300,8500,9600,9700,11000,11000,11000,501,0,1700.0,3264.302366730534,1261.0177900000963,10655.230038999889,136.7684630738523
+1719409762,500,,Aggregated,38.700000,0.000000,1700,4200,5400,5900,7500,8700,9700,10000,11000,11000,11000,536,0,1700.0,3396.252104031713,1261.0177900000963,10655.230038999889,136.7705223880597
+1719409763,500,,Aggregated,39.600000,0.000000,1800,4300,5700,6000,8500,9700,10000,11000,11000,11000,11000,584,0,1700.0,3587.6369061404084,1261.0177900000963,11381.801426000038,136.763698630137
+1719409764,500,,Aggregated,39.300000,0.000000,1900,4600,5900,6400,9000,10000,11000,13000,13000,13000,13000,631,0,1900.0,3853.1629661727407,1261.0177900000963,12648.173237000265,136.76386687797148
+1719409765,500,,Aggregated,42.100000,0.000000,2100,5100,6100,7300,9700,11000,13000,13000,14000,14000,14000,680,1,2000.0,4107.954662763233,1.944665999872086,13822.554110000056,136.55735294117648
+1719409766,500,,Aggregated,42.700000,0.000000,2000,5400,6200,7300,10000,11000,13000,13000,14000,14000,14000,714,1,2000.0,4162.236196249298,1.944665999872086,13822.554110000056,136.56442577030813
+1719409767,500,,Aggregated,42.600000,0.100000,2100,5600,6200,7500,10000,12000,13000,14000,14000,14000,14000,757,1,2100.0,4290.79328791017,1.944665999872086,14256.511758000215,136.57199471598415
+1719409768,500,,Aggregated,43.000000,0.100000,2500,5700,6400,8400,11000,13000,14000,15000,16000,16000,16000,796,1,2400.0,4511.933391639445,1.944665999872086,15521.435124000163,136.58165829145727
+1719409769,500,,Aggregated,42.700000,0.100000,2700,5900,7200,8700,12000,14000,15000,16000,16000,16000,16000,849,1,2700.0,4732.106024339221,1.944665999872086,16448.77474399982,136.59010600706713
+1719409770,500,,Aggregated,42.900000,0.100000,2800,6000,7400,9100,13000,14000,16000,16000,17000,17000,17000,894,1,2800.0,4946.710819477626,1.944665999872086,16954.83104499999,136.59507829977628
+1719409771,500,,Aggregated,43.700000,0.100000,2800,6000,7400,9200,13000,14000,16000,16000,17000,17000,17000,921,1,2800.0,4972.649967875132,1.944665999872086,17073.878724000224,136.59934853420197
+1719409772,500,,Aggregated,42.100000,0.100000,2800,6000,7500,9600,13000,15000,16000,17000,18000,18000,18000,971,1,2800.0,5131.706580979398,1.944665999872086,18335.012050000387,136.6076210092688
+1719409773,500,,Aggregated,42.700000,0.100000,2900,6100,8500,10000,14000,16000,17000,18000,19000,19000,19000,1016,1,2900.0,5331.196593924207,1.944665999872086,19402.754682999785,136.61515748031496
+1719409774,500,,Aggregated,43.600000,0.100000,2900,6100,8700,10000,14000,16000,19000,19000,20000,20000,20000,1065,1,2900.0,5544.836847229102,1.944665999872086,19885.385042000053,136.618779342723
+1719409775,500,,Aggregated,43.200000,0.100000,3100,6200,9000,11000,14000,17000,19000,20000,21000,21000,21000,1110,1,3100.0,5728.511061962157,1.944665999872086,20998.376543000177,136.61621621621623
+1719409776,500,,Aggregated,42.800000,0.100000,3100,6200,9000,11000,14000,17000,19000,20000,21000,21000,21000,1142,1,3000.0,5714.019045704025,1.944665999872086,21144.795220000105,136.61821366024517
+1719409777,500,,Aggregated,41.900000,0.000000,3100,6200,9500,11000,15000,18000,20000,21000,21000,21000,21000,1184,1,3100.0,5890.661290177362,1.944665999872086,21418.2882719997,136.625
+1719409778,500,,Aggregated,41.900000,0.000000,3200,6200,9700,12000,15000,19000,21000,21000,22000,23000,23000,1230,1,3200.0,6075.928582534142,1.944665999872086,22582.99703600005,136.62926829268292
+1719409779,500,,Aggregated,43.400000,0.000000,3400,6300,10000,13000,16000,19000,21000,22000,23000,23000,23000,1284,1,3400.0,6239.658960612922,1.944665999872086,23195.42864799996,136.6355140186916
+1719409780,500,,Aggregated,43.800000,0.000000,3700,6300,10000,13000,16000,20000,21000,22000,24000,24000,24000,1323,1,3700.0,6360.389593028716,1.944665999872086,24014.85227100011,136.63643235071805
+1719409781,500,,Aggregated,43.100000,0.000000,3700,6300,10000,13000,16000,20000,21000,23000,24000,24000,24000,1356,1,3600.0,6386.859659309728,1.944665999872086,24217.959419999715,136.64085545722713
+1719409782,500,,Aggregated,42.600000,0.000000,3800,6400,11000,13000,17000,20000,22000,24000,25000,25000,25000,1404,1,3700.0,6514.713524649568,1.944665999872086,25078.24636000032,136.64245014245014
+1719409783,500,,Aggregated,43.300000,0.000000,4200,6500,11000,14000,17000,21000,23000,24000,25000,25000,25000,1445,1,4200.0,6682.5960987930775,1.944665999872086,25078.24636000032,136.6477508650519
+1719409784,500,,Aggregated,43.300000,0.000000,4200,6600,12000,14000,19000,21000,24000,25000,25000,25000,25000,1498,1,4200.0,6813.298066919225,1.944665999872086,25097.526537999784,136.65086782376503
+1719409785,500,,Aggregated,43.000000,0.000000,4200,6600,12000,14000,19000,21000,24000,25000,25000,25000,25000,1534,1,4200.0,6824.073270511734,1.944665999872086,25097.526537999784,136.65514993481096
+1719409786,500,,Aggregated,43.000000,0.000000,4300,6600,12000,14000,19000,21000,24000,25000,25000,25000,25000,1566,1,4200.0,6907.866205554917,1.944665999872086,25097.526537999784,136.6551724137931
+1719409787,500,,Aggregated,43.200000,0.000000,4300,7200,13000,14000,19000,21000,24000,25000,25000,25000,25000,1617,1,4300.0,7010.403652472478,1.944665999872086,25097.526537999784,136.65924551638838
+1719409788,500,,Aggregated,42.800000,0.000000,4400,7300,13000,14000,20000,21000,25000,25000,25000,25000,25000,1665,1,4400.0,7127.453349181381,1.944665999872086,25129.52445100018,136.66186186186187
+1719409789,500,,Aggregated,43.000000,0.000000,4600,7400,13000,14000,20000,21000,25000,25000,25000,25000,25000,1712,1,4500.0,7222.519261254086,1.944665999872086,25289.052397999967,136.66471962616822
+1719409790,500,,Aggregated,43.000000,0.000000,4500,7400,13000,14000,20000,21000,25000,25000,25000,25000,25000,1748,1,4500.0,7236.843624127571,1.944665999872086,25289.052397999967,136.66704805491992
+1719409791,500,,Aggregated,41.400000,0.000000,4600,7500,14000,14000,20000,21000,25000,25000,25000,25000,25000,1782,1,4600.0,7300.47649954265,1.944665999872086,25289.052397999967,136.66778900112234
+1719409792,500,,Aggregated,43.100000,0.000000,4700,7700,14000,14000,20000,22000,25000,25000,25000,25000,25000,1834,1,4600.0,7374.7316095136375,1.944665999872086,25289.052397999967,136.67121046892038
+1719409793,500,,Aggregated,43.100000,0.000000,5000,8400,14000,14000,20000,23000,25000,25000,25000,25000,25000,1881,1,5000.0,7478.029888249343,1.944665999872086,25289.052397999967,136.67145135566187
+1719409794,500,,Aggregated,43.000000,0.000000,5300,8500,14000,14000,20000,23000,25000,25000,25000,25000,25000,1928,1,5300.0,7552.649418308615,1.944665999872086,25289.052397999967,136.67427385892117
+1719409795,500,,Aggregated,43.500000,0.000000,5100,8500,14000,14000,20000,23000,25000,25000,25000,25000,25000,1962,1,5100.0,7544.475382013767,1.944665999872086,25289.052397999967,136.6763506625892
+1719409796,500,,Aggregated,42.100000,0.000000,5400,8700,14000,14000,20000,24000,25000,25000,25000,25000,25000,1998,1,5300.0,7609.947784719727,1.944665999872086,25289.052397999967,136.6791791791792
+1719409797,500,,Aggregated,42.600000,0.000000,5400,8700,14000,14000,20000,24000,25000,25000,25000,25000,25000,2048,1,5400.0,7677.0136849375085,1.944665999872086,25289.052397999967,136.6845703125
+1719409798,500,,Aggregated,43.300000,0.000000,5600,9000,14000,14000,21000,24000,25000,25000,25000,25000,25000,2100,1,5600.0,7748.132748470485,1.944665999872086,25289.052397999967,136.6852380952381
+1719409799,500,,Aggregated,42.900000,0.000000,5700,9100,14000,14000,21000,24000,25000,25000,25000,25000,25000,2140,1,5700.0,7803.660731943934,1.944665999872086,25289.052397999967,136.68644859813085
+1719409800,500,,Aggregated,42.900000,0.000000,5700,9000,14000,14000,21000,24000,25000,25000,25000,25000,25000,2174,1,5700.0,7794.175740589705,1.944665999872086,25289.052397999967,136.68583256669734
+1719409801,500,,Aggregated,43.100000,0.000000,5700,9400,14000,14000,21000,24000,25000,25000,25000,25000,25000,2216,1,5700.0,7856.570955070403,1.944665999872086,25289.052397999967,136.68682310469313
+1719409802,500,,Aggregated,42.400000,0.000000,5700,9600,14000,14000,21000,24000,25000,25000,25000,25000,25000,2260,1,5700.0,7930.892839802662,1.944665999872086,25289.052397999967,136.68893805309733
diff --git a/benchmark/promptflow-serve/test_runner/build.sh b/benchmark/promptflow-serve/test_runner/build.sh
new file mode 100755
index 00000000000..a012882c51c
--- /dev/null
+++ b/benchmark/promptflow-serve/test_runner/build.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+set -eux
+
+build_service() {
+ local service_dir=$1
+ local image_name=$2
+
+ cd "$service_dir"
+ rm -rf ./build
+ pf flow build --source . --output ./build --format docker
+ cd ./build
+ docker build -t "$image_name" .
+}
+
+# base directory
+BASE_DIR=$(pwd)
+
+# build the async mock back-end api service
+cd "$BASE_DIR/../mock_api"
+docker build -t fastapi-wait-service .
+
+# build the static DAG async service
+build_service "$BASE_DIR/../pf_flows/static_async" "pf-static-async-service"
+
+# build the static DAG sync service
+build_service "$BASE_DIR/../pf_flows/static_sync" "pf-static-sync-service"
+
+# build the flexflow async service
+build_service "$BASE_DIR/../pf_flows/flex_async" "pf-flex-async-service"
\ No newline at end of file
diff --git a/benchmark/promptflow-serve/test_runner/docker-compose.yml b/benchmark/promptflow-serve/test_runner/docker-compose.yml
new file mode 100644
index 00000000000..67ec7299b44
--- /dev/null
+++ b/benchmark/promptflow-serve/test_runner/docker-compose.yml
@@ -0,0 +1,62 @@
+version: '3.8'
+
+# See this on how to configure the worker and thread count:
+# https://learn.microsoft.com/en-us/azure/machine-learning/prompt-flow/how-to-deploy-to-code?view=azureml-api-2&tabs=managed#configure-concurrency-for-deployment
+
+services:
+ fastapi-wait-service:
+ image: fastapi-wait-service
+ container_name: fastapi-wait-service
+ ports:
+ - "50001:50001"
+ environment:
+ - MIN_WAIT_TIME_SEC=1
+ - MAX_WAIT_TIME_SEC=1
+ networks:
+ - my_network
+
+ # flexflow async
+ pf-flex-async-service:
+ image: pf-flex-async-service
+ container_name: pf-flex-async-service
+ ports:
+ - "8083:8080"
+ environment:
+ - PROMPTFLOW_WORKER_NUM=${PROMPTFLOW_WORKER_NUM}
+ - PROMPTFLOW_WORKER_THREADS=${PROMPTFLOW_WORKER_NUM}
+ - PROMPTFLOW_SERVING_ENGINE=fastapi
+ - MOCK_API_ENDPOINT=http://fastapi-wait-service:50001
+ networks:
+ - my_network
+
+ # static dag async
+ pf-static-async-service:
+ image: pf-static-async-service
+ container_name: pf-static-async-service
+ ports:
+ - "8081:8080"
+ environment:
+ - PROMPTFLOW_WORKER_NUM=${PROMPTFLOW_WORKER_NUM}
+ - PROMPTFLOW_WORKER_THREADS=${PROMPTFLOW_WORKER_NUM}
+ - PROMPTFLOW_SERVING_ENGINE=fastapi
+ - MOCK_API_ENDPOINT=http://fastapi-wait-service:50001
+ networks:
+ - my_network
+
+ # static dag sync
+ pf-static-sync-service:
+ image: pf-static-sync-service
+ container_name: pf-static-sync-service
+ ports:
+ - "8082:8080"
+ environment:
+ - PROMPTFLOW_WORKER_NUM=${PROMPTFLOW_WORKER_NUM}
+ - PROMPTFLOW_WORKER_THREADS=${PROMPTFLOW_WORKER_NUM}
+ - PROMPTFLOW_SERVING_ENGINE=flask
+ - MOCK_API_ENDPOINT=http://fastapi-wait-service:50001
+ networks:
+ - my_network
+
+networks:
+ my_network:
+ driver: bridge
diff --git a/benchmark/promptflow-serve/test_runner/mock_locustfile.py b/benchmark/promptflow-serve/test_runner/mock_locustfile.py
new file mode 100644
index 00000000000..498b20d1f29
--- /dev/null
+++ b/benchmark/promptflow-serve/test_runner/mock_locustfile.py
@@ -0,0 +1,13 @@
+from locust import HttpUser, TaskSet, between, task
+
+
+class UserBehavior(TaskSet):
+ @task
+ def test_endpoint(self):
+ response = self.client.get("/")
+ print(response.status_code, response.elapsed.total_seconds())
+
+
+class WebsiteUser(HttpUser):
+ tasks = [UserBehavior]
+ wait_time = between(1, 2)
diff --git a/benchmark/promptflow-serve/test_runner/pf_locustfile.py b/benchmark/promptflow-serve/test_runner/pf_locustfile.py
new file mode 100644
index 00000000000..43df355c9dd
--- /dev/null
+++ b/benchmark/promptflow-serve/test_runner/pf_locustfile.py
@@ -0,0 +1,13 @@
+from locust import HttpUser, TaskSet, between, task
+
+
+class UserBehavior(TaskSet):
+ @task
+ def test_endpoint(self):
+ response = self.client.post("/score", json={"question": "Test question", "chat_history": []})
+ print(response.status_code, response.elapsed.total_seconds(), response.json())
+
+
+class WebsiteUser(HttpUser):
+ tasks = [UserBehavior]
+ wait_time = between(1, 2)
diff --git a/benchmark/promptflow-serve/test_runner/run_locust.sh b/benchmark/promptflow-serve/test_runner/run_locust.sh
new file mode 100755
index 00000000000..0f1cd2773ed
--- /dev/null
+++ b/benchmark/promptflow-serve/test_runner/run_locust.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# List of required environment variables
+required_vars=(
+ HOST_PROJECT_PATH
+ LOCUST_FILE
+ TARGET_HOST
+ USERS
+ HATCH_RATE
+ RUN_TIME
+ TARGET_TYPE
+)
+
+# Check if all required environment variables are set and not empty
+for var in "${required_vars[@]}"; do
+ if [ -z "${!var}" ]; then
+ echo "Error: Environment variable $var is not set or is empty."
+ exit 1
+ fi
+done
+
+docker run --rm -it \
+ --network=host \
+ -v $HOST_PROJECT_PATH/benchmark/promptflow-serve/test_runner:/mnt/locust \
+ locustio/locust \
+ -f /mnt/locust/$LOCUST_FILE \
+ --host=$TARGET_HOST \
+ --headless \
+ -u $USERS \
+ -r $HATCH_RATE \
+ --run-time $RUN_TIME \
+ --html=/mnt/locust/locust-results/${TARGET_TYPE}_report_u${USERS}_h${HATCH_RATE}.html \
+ --csv=/mnt/locust/locust-results/${TARGET_TYPE}_report_u${USERS}_h${HATCH_RATE} \
+ --print-stats
diff --git a/benchmark/promptflow-serve/test_runner/settings.env b/benchmark/promptflow-serve/test_runner/settings.env
new file mode 100644
index 00000000000..05bc8a8141f
--- /dev/null
+++ b/benchmark/promptflow-serve/test_runner/settings.env
@@ -0,0 +1,5 @@
+USERS=500
+HATCH_RATE=50
+RUN_TIME=60s
+PROMPTFLOW_WORKER_NUM=8
+PROMPTFLOW_WORKER_THREADS=4
diff --git a/benchmark/promptflow-serve/test_runner/test.sh b/benchmark/promptflow-serve/test_runner/test.sh
new file mode 100755
index 00000000000..79644a99f1d
--- /dev/null
+++ b/benchmark/promptflow-serve/test_runner/test.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+# print usage if no test arg
+if [ -z "${1:-}" ]; then
+ echo "Usage: $0 "
+ echo "Available tests: mock, staticasync, staticsync, flexasync"
+ exit 1
+fi
+
+# Source the .env file to export the variables
+if [ -f settings.env ]; then
+ cat settings.env
+ set -o allexport
+ source settings.env
+ set +o allexport
+else
+ echo "settings.env file not found!"
+ exit 1
+fi
+
+# List of required environment variables
+required_vars=(
+ USERS
+ HATCH_RATE
+ RUN_TIME
+ PROMPTFLOW_WORKER_NUM
+ PROMPTFLOW_WORKER_THREADS
+)
+
+# Check if all required environment variables are set and not empty
+for var in "${required_vars[@]}"; do
+ if [ -z "${!var}" ]; then
+ echo "Error: Environment variable $var is not set or is empty."
+ exit 1
+ fi
+done
+
+case $1 in
+ mock)
+ export LOCUST_FILE=mock_locustfile.py
+ export TARGET_HOST=http://localhost:50001/
+ export TARGET_TYPE=mock
+ ENV_PREP="docker-compose up fastapi-wait-service -d"
+ ;;
+ staticasync)
+ export LOCUST_FILE=pf_locustfile.py
+ export TARGET_HOST=http://localhost:8081/
+ export TARGET_TYPE=pf_static_async
+ ENV_PREP="docker-compose up fastapi-wait-service pf-static-async-service -d"
+ ;;
+ staticsync)
+ export LOCUST_FILE=pf_locustfile.py
+ export TARGET_HOST=http://localhost:8082/
+ export TARGET_TYPE=pf_static_sync
+ ENV_PREP="docker-compose up fastapi-wait-service pf-static-sync-service -d"
+ ;;
+ flexasync)
+ export LOCUST_FILE=pf_locustfile.py
+ export TARGET_HOST=http://localhost:8083/
+ export TARGET_TYPE=pf_flex_async
+ ENV_PREP="docker-compose up fastapi-wait-service pf-flex-async-service -d"
+ ;;
+ *)
+ echo "Invalid endpoint. Available endpoints: mock, staticasync, staticsync, flexasync"
+ exit 1
+ ;;
+esac
+
+# prepare the env, starting at least the mock service
+echo "Stopping existing services..."
+docker-compose down --remove-orphans || echo "docker-compose down encountered an error, but we're ignoring it."
+
+echo "Starting the services..."
+$ENV_PREP
+
+echo "Waiting before running tests..."
+secs=$((30))
+while [ $secs -gt 0 ]; do
+ echo -ne "$secs\033[0K\r"
+ sleep 1
+ : $((secs--))
+done
+
+echo "Running Locust tests against $1 endpoint..."
+./run_locust.sh