diff --git a/.github/workflows/samples_flows_standard_maths_to_code.yml b/.github/workflows/samples_flows_standard_maths_to_code.yml
new file mode 100644
index 00000000000..d6b2c708933
--- /dev/null
+++ b/.github/workflows/samples_flows_standard_maths_to_code.yml
@@ -0,0 +1,89 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: samples_flows_standard_maths_to_code
+on:
+  schedule:
+    - cron: "39 19 * * *" # Every day starting at 3:39 BJT
+  pull_request:
+    branches: [ main ]
+    paths: [ examples/flows/standard/maths-to-code/**, examples/*requirements.txt, .github/workflows/samples_flows_standard_maths_to_code.yml ]
+  workflow_dispatch:
+
+jobs:
+  samples_readme_ci:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Setup Python 3.9 environment
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+      - name: Generate config.json
+        run: echo ${{ secrets.TEST_WORKSPACE_CONFIG_JSON }} > ${{ github.workspace }}/examples/config.json
+      - name: Prepare requirements
+        working-directory: examples
+        run: |
+          if [[ -e requirements.txt ]]; then
+            python -m pip install --upgrade pip
+            pip install -r requirements.txt
+          fi
+      - name: Prepare dev requirements
+        working-directory: examples
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r dev_requirements.txt
+      - name: Refine .env file
+        working-directory: examples/flows/standard/maths-to-code
+        run: |
+          AOAI_API_KEY=${{ secrets.AOAI_API_KEY_TEST }}
+          AOAI_API_ENDPOINT=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          AOAI_API_ENDPOINT=$(echo ${AOAI_API_ENDPOINT//\//\\/})
+          if [[ -e .env.example ]]; then
+            echo "env replacement"
+            sed -i -e "s/<your_AOAI_key>/$AOAI_API_KEY/g" -e "s/<your_AOAI_endpoint>/$AOAI_API_ENDPOINT/g" .env.example
+            mv .env.example .env
+          fi
+      - name: Create run.yml
+        working-directory: examples/flows/standard/maths-to-code
+        run: |
+          gpt_base=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          gpt_base=$(echo ${gpt_base//\//\\/})
+          if [[ -e run.yml ]]; then
+            sed -i -e "s/\${azure_open_ai_connection.api_key}/${{ secrets.AOAI_API_KEY_TEST }}/g" -e "s/\${azure_open_ai_connection.api_base}/$gpt_base/g" run.yml
+          fi
+      - name: Azure Login
+        uses: azure/login@v1
+        with:
+          creds: ${{ secrets.AZURE_CREDENTIALS }}
+      - name: Extract Steps examples/flows/standard/maths-to-code/README.md
+        working-directory: ${{ github.workspace }}
+        run: |
+          python scripts/readme/extract_steps_from_readme.py -f examples/flows/standard/maths-to-code/README.md -o examples/flows/standard/maths-to-code
+      - name: Cat script
+        working-directory: examples/flows/standard/maths-to-code
+        run: |
+          cat bash_script.sh
+      - name: Run scripts
+        working-directory: examples/flows/standard/maths-to-code
+        run: |
+          export aoai_api_key=${{secrets.AOAI_API_KEY_TEST }}
+          export aoai_api_endpoint=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          export test_workspace_sub_id=${{ secrets.TEST_WORKSPACE_SUB_ID }}
+          export test_workspace_rg=${{ secrets.TEST_WORKSPACE_RG }}
+          export test_workspace_name=${{ secrets.TEST_WORKSPACE_NAME }}
+          bash bash_script.sh
+      - name: Pip List for Debug
+        if : ${{ always() }}
+        working-directory: examples/flows/standard/maths-to-code
+        run: |
+          pip list
+      - name: Upload artifact
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: artifact
+          path: examples/flows/standard/maths-to-code/bash_script.sh
\ No newline at end of file
diff --git a/examples/flows/chat/chat-with-pdf/tests/base_test.py b/examples/flows/chat/chat-with-pdf/tests/base_test.py
index 0ccbe183de1..b7612afaa66 100644
--- a/examples/flows/chat/chat-with-pdf/tests/base_test.py
+++ b/examples/flows/chat/chat-with-pdf/tests/base_test.py
@@ -110,7 +110,7 @@ def create_eval_run(
     def check_run_basics(self, run, display_name=None):
         self.assertTrue(run is not None)
         if display_name is not None:
-            self.assertEqual(run.display_name, display_name)
+            self.assertTrue(run.display_name.find(display_name) != -1)
         self.assertEqual(run.tags["unittest"], "true")
 
     def run_eval_with_config(
diff --git a/examples/flows/evaluation/eval-accuracy-maths-to-code/aggregate.py b/examples/flows/evaluation/eval-accuracy-maths-to-code/aggregate.py
new file mode 100644
index 00000000000..ae265856ca4
--- /dev/null
+++ b/examples/flows/evaluation/eval-accuracy-maths-to-code/aggregate.py
@@ -0,0 +1,37 @@
+from typing import List
+from promptflow import tool
+from promptflow import log_metric
+
+
+@tool
+def accuracy_aggregate(processed_results: List[int]):
+
+    num_exception = 0
+    num_correct = 0
+
+    for i in range(len(processed_results)):
+        if processed_results[i] == -1:
+            num_exception += 1
+        elif processed_results[i] == 1:
+            num_correct += 1
+
+    num_total = len(processed_results)
+    accuracy = round(1.0 * num_correct / num_total, 2)
+    error_rate = round(1.0 * num_exception / num_total, 2)
+
+    log_metric(key="accuracy", value=accuracy)
+    log_metric(key="error_rate", value=error_rate)
+
+    return {
+        "num_total": num_total,
+        "num_correct": num_correct,
+        "num_exception": num_exception,
+        "accuracy": accuracy,
+        "error_rate": error_rate
+    }
+
+
+if __name__ == "__main__":
+    numbers = [1, 1, 1, 1, 0, -1, -1]
+    accuracy = accuracy_aggregate(numbers)
+    print("The accuracy is", accuracy)
diff --git a/examples/flows/evaluation/eval-accuracy-maths-to-code/flow.dag.yaml b/examples/flows/evaluation/eval-accuracy-maths-to-code/flow.dag.yaml
new file mode 100644
index 00000000000..52b94d311fd
--- /dev/null
+++ b/examples/flows/evaluation/eval-accuracy-maths-to-code/flow.dag.yaml
@@ -0,0 +1,29 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+inputs:
+  groundtruth:
+    type: string
+    default: "1"
+  prediction:
+    type: string
+    default: "2"
+outputs:
+  score:
+    type: string
+    reference: ${line_process.output}
+nodes:
+- name: line_process
+  type: python
+  source:
+    type: code
+    path: line_process.py
+  inputs:
+    groundtruth: ${inputs.groundtruth}
+    prediction: ${inputs.prediction}
+- name: aggregate
+  type: python
+  source:
+    type: code
+    path: aggregate.py
+  inputs:
+    processed_results: ${line_process.output}
+  aggregation: true
diff --git a/examples/flows/evaluation/eval-accuracy-maths-to-code/line_process.py b/examples/flows/evaluation/eval-accuracy-maths-to-code/line_process.py
new file mode 100644
index 00000000000..a4d78553c95
--- /dev/null
+++ b/examples/flows/evaluation/eval-accuracy-maths-to-code/line_process.py
@@ -0,0 +1,40 @@
+from promptflow import tool
+
+
+@tool
+def line_process(groundtruth: str, prediction: str) -> int:
+
+    processed_result = 0
+
+    if prediction == "JSONDecodeError" or prediction.startswith("Unknown Error:"):
+        processed_result = -1
+        return processed_result
+
+    try:
+        groundtruth = float(groundtruth)
+        prediction = float(prediction)
+    except ValueError:
+        processed_result = -1
+        return processed_result
+
+    if round(prediction, 2) == round(groundtruth, 2):
+        processed_result = 1
+
+    return processed_result
+
+
+if __name__ == "__main__":
+    processed_result = line_process("1.0", "1")
+    print("The processed result is", processed_result)
+
+    processed_result = line_process("3.14", "3.1415926")
+    print("The processed result is", processed_result)
+
+    processed_result = line_process("2.1", "2.0")
+    print("The processed result is", processed_result)
+
+    processed_result = line_process("1.0", "JSONDecodeError")
+    print("The processed result is", processed_result)
+
+    processed_result = line_process("1.0", "No module named 'numpy'")
+    print("The processed result is", processed_result)
diff --git a/examples/flows/evaluation/eval-accuracy-maths-to-code/test_data.jsonl b/examples/flows/evaluation/eval-accuracy-maths-to-code/test_data.jsonl
new file mode 100644
index 00000000000..1bbfb38ec79
--- /dev/null
+++ b/examples/flows/evaluation/eval-accuracy-maths-to-code/test_data.jsonl
@@ -0,0 +1,20 @@
+{"question": "What is the sum of 5 and 3?", "groundtruth": "8", "answer": "8"}
+{"question": "Subtract 7 from 10.", "groundtruth": "3", "answer": "3"}
+{"question": "Multiply 6 by 4.", "groundtruth": "24", "answer": "24"}
+{"question": "Divide 20 by 5.", "groundtruth": "4", "answer": "4"}
+{"question": "What is the square of 7?", "groundtruth": "49", "answer": "49"}
+{"question": "What is the square root of 81?", "groundtruth": "9", "answer": "9"}
+{"question": "If a rectangle has a length of 10 and width of 5, what is the area?", "groundtruth": "50", "answer": "50"}
+{"question": "A circle has a radius of 7, what is the area? (Use 3.14 for pi)", "groundtruth": "153.86", "answer": "153.871"}
+{"question": "Solve for x in the equation 2x + 3 = 9.", "groundtruth": "3", "answer": "3"}
+{"question": "What is the value of x if 5x = 25?", "groundtruth": "5", "answer": "5"}
+{"question": "A car travels 200 miles in 4 hours. What is the average speed of the car?", "groundtruth": "50", "answer": "50"}
+{"question": "A car travels at a speed of 60 mph. How long will it take to travel 180 miles?", "groundtruth": "3", "answer": "3"}
+{"question": "If a car travels at a speed of 40 mph for 2 hours, how far will it travel?","groundtruth": "80", "answer": "80"}
+{"question":"A rectangle has length = 10 cm and width = 5 cm. What is its area?", "groundtruth":"50", "answer": "50"}
+{"question":"A circle has radius = 7 cm. What is its circumference? (Use pi =3.14)", "groundtruth":"43.96", "answer": "43.959"}
+{"question":"A triangle has base =10 cm and height =5 cm. What is its area?", "groundtruth":"25", "answer": "25"}
+{"question":"What is the slope of the line that passes through (2,3) and (4,7)?", "groundtruth":"2", "answer": "2"}
+{"question":"The distance between A and B is 2000km, A is moving towards B with speed 80km/hour, meanwhile B is moving towards A with speed 120km/hour, how many hours later A and B can meet?", "groundtruth":"10", "answer": "10"}
+{"question":"The lengths of the two perpendicular sides of a right triangle are 6cm and 8cm. What is the length of the hypotenuse?", "groundtruth": "10", "answer": "10"}
+{"question":"A is running with average speed 10km/hour, A already run half hour. B start to chase A along the same route with average speed 15km/hour, how many hours B will take to meet A?", "groundtruth":"1", "answer": "2"}
\ No newline at end of file
diff --git a/examples/flows/standard/maths-to-code/README.md b/examples/flows/standard/maths-to-code/README.md
new file mode 100644
index 00000000000..61b5ca0e716
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/README.md
@@ -0,0 +1,79 @@
+# Math to Code
+Math to Code is a project that utilizes the power of the chatGPT model to generate code that models math questions and then executes the generated code to obtain the final numerical answer.
+
+> [!NOTE]
+>
+> Building a system that generates executable code from user input with LLM is [a complex problem with potential security risks](
+https://developer.nvidia.com/blog/securing-llm-systems-against-prompt-injection/
+), this example is more of a demonstration rather than something you can directly use in production. To build such system correctly, you should address key security considerations like input validation, additional sanitization of the code generated or better run the generated code in a sandbox environment.
+
+Tools used in this flow：
+
+- `python` tool
+- built-in `llm` tool
+
+Connections used in this flow:
+
+- `open_ai` connection
+
+## Prerequisites
+Install promptflow sdk and other dependencies:
+
+```cmd
+pip install -r requirements.txt
+```
+
+## Setup connection
+Prepare your Azure Open AI resource follow this [instruction](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal) and get your `api_key` if you don't have one.
+
+Note in this example, we are using [chat api](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chatgpt?pivots=programming-language-chat-completions), please use `gpt-35-turbo` or `gpt-4` model deployment.
+
+Create connection if you haven't done that. Ensure you have put your azure open ai endpoint key in [azure_openai.yml](azure_openai.yml) file. 
+```bash
+# Override keys with --set to avoid yaml file changes
+pf connection create -f ../../../connections/azure_openai.yml --set api_key=<your_api_key> api_base=<your_api_base>
+```
+
+Ensure you have created `open_ai_connection` connection.
+```bash
+pf connection show -n open_ai_connection
+```
+
+
+## Run flow in local
+
+### Run locally with single line input
+
+```bash
+# test with default input value in flow.dag.yaml
+pf flow test --flow .
+# test with specific input
+pf flow test --flow . --inputs math_question='If a rectangle has a length of 10 and width of 5, what is the area?'
+```
+
+### Run with multiple lines data
+
+- create run
+```bash
+# create a random run name
+run_name="math_to_code_"$(openssl rand -hex 12)
+pf run create --flow . --data ./math_data.jsonl --column-mapping math_question='${data.question}' --name $run_name --stream
+```
+
+### Get the accuracy using evaluation flow
+Use [eval-accuracy-maths-to-code](../../evaluation/eval-accuracy-maths-to-code/) to evaluate accuracy and error rate metrics against the math-to-code flow.
+
+- accuracy: if the generated code can be correctly executed and got final number answer, it will be compare with the groundtruth in the test data. For single instance, it's True if the final number equals to the groundtruth, False otherwise. Accuracy is to measure the correct percentage against test data.
+- error_rate: some case the flow cannot get number answer, for example, the generated code cannot be executed due to code parsing error of dependent package not available in conda env. Error rate is to measure the percentage of this case in test data. 
+
+```bash
+# create a random eval run name
+eval_run_name="math_to_code_eval_run_"$(openssl rand -hex 12)
+
+# invoke accuracy and error rate evaluation against math-to-code batch run
+pf run create --flow ../../evaluation/eval-accuracy-maths-to-code/ --data ./math_data.jsonl --column-mapping groundtruth='${data.answer}' prediction='${run.outputs.answer}' --run $run_name --name $eval_run_name --stream
+
+# view the run details
+pf run show-details -n $eval_run_name
+pf run show-metrics -n $eval_run_name
+```
diff --git a/examples/flows/standard/maths-to-code/ask_llm.jinja2 b/examples/flows/standard/maths-to-code/ask_llm.jinja2
new file mode 100644
index 00000000000..8fea4ce5875
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/ask_llm.jinja2
@@ -0,0 +1,23 @@
+system:
+I want you to act as a Math expert specializing in Algebra, Geometry, and Calculus. Given the question, develop python code to model the user's question.
+The python code will print the result at the end.
+Please generate executable python code, your reply will be in JSON format, something like:
+{
+    "code": "print(1+1)"
+}
+
+user:
+This a set of examples including question and the final answer:
+{% for ex in examples %}
+QUESTION: {{ ex.question }}
+CODE:
+{{ ex.code }}
+
+{% endfor %}
+
+Now come to the real task, make sure return a valid json. The json should contain a key named "code" and the value is the python code. For example:
+{
+    "code": "print(1+1)"
+}
+QUESTION: {{ question }}
+CODE:
diff --git a/examples/flows/standard/maths-to-code/code_execution.py b/examples/flows/standard/maths-to-code/code_execution.py
new file mode 100644
index 00000000000..2c082e28a5f
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/code_execution.py
@@ -0,0 +1,33 @@
+from promptflow import tool
+
+import sys
+from io import StringIO
+
+
+@tool
+def func_exe(code_snippet: str):
+    if code_snippet == "JSONDecodeError" or code_snippet.startswith("Unknown Error:"):
+        return code_snippet
+
+    # Define the result variable before executing the code snippet
+    old_stdout = sys.stdout
+    redirected_output = sys.stdout = StringIO()
+
+    # Execute the code snippet
+    try:
+        exec(code_snippet.lstrip())
+    except Exception as e:
+        sys.stdout = old_stdout
+        return str(e)
+
+    sys.stdout = old_stdout
+    return redirected_output.getvalue().strip()
+
+
+if __name__ == "__main__":
+    print(func_exe("print(5+3)"))
+    print(func_exe("count = 0\nfor i in range(100):\n    if i % 8 == 0:\n        count += 1\nprint(count)"))
+    print(func_exe("sum = 0\ni = 0\nwhile 3**i < 100:\n    sum += 3**i\n    i += 1\nprint(sum)"))
+    print(func_exe("speed_A = 80\nspeed_B = 120\ndistance = 2000\ntime = distance / (speed_A + speed_B)\nprint(time)"))
+    print(func_exe("Unknown Error"))
+    print(func_exe("JSONDecodeError"))
diff --git a/examples/flows/standard/maths-to-code/code_refine.py b/examples/flows/standard/maths-to-code/code_refine.py
new file mode 100644
index 00000000000..0e671c4fc58
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/code_refine.py
@@ -0,0 +1,58 @@
+from promptflow import tool
+import ast
+import json
+
+
+def infinite_loop_check(code_snippet):
+    tree = ast.parse(code_snippet)
+    for node in ast.walk(tree):
+        if isinstance(node, ast.While):
+            if not node.orelse:
+                return True
+    return False
+
+
+def syntax_error_check(code_snippet):
+    try:
+        ast.parse(code_snippet)
+    except SyntaxError:
+        return True
+    return False
+
+
+def error_fix(code_snippet):
+    tree = ast.parse(code_snippet)
+    for node in ast.walk(tree):
+        if isinstance(node, ast.While):
+            if not node.orelse:
+                node.orelse = [ast.Pass()]
+    return ast.unparse(tree)
+
+
+@tool
+def code_refine(original_code: str) -> str:
+
+    try:
+        original_code = json.loads(original_code)["code"]
+        fixed_code = None
+
+        if infinite_loop_check(original_code):
+            fixed_code = error_fix(original_code)
+        else:
+            fixed_code = original_code
+
+        if syntax_error_check(fixed_code):
+            fixed_code = error_fix(fixed_code)
+
+        return fixed_code
+    except json.JSONDecodeError:
+        return "JSONDecodeError"
+    except Exception as e:
+        return "Unknown Error:" + str(e)
+
+
+if __name__ == "__main__":
+    code = "{\n    \"code\": \"distance_A = 10 * 0.5\\ndistance_B = 15 * t\\n\\n\
+        equation: distance_A = distance_B\\n\\n\10 * 0.5 = 15 * t\\n\\nt = (10 * 0.5) / 15\\n\\nprint(t)\"\n}"
+    code_refine = code_refine(code)
+    print(code_refine)
diff --git a/examples/flows/standard/maths-to-code/flow.dag.yaml b/examples/flows/standard/maths-to-code/flow.dag.yaml
new file mode 100644
index 00000000000..d3c67f757de
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/flow.dag.yaml
@@ -0,0 +1,46 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+environment:
+  python_requirements_txt: requirements.txt
+inputs:
+  math_question:
+    type: string
+    default: If a rectangle has a length of 10 and width of 5, what is the area?
+outputs:
+  code:
+    type: string
+    reference: ${code_refine.output}
+  answer:
+    type: string
+    reference: ${final_code_execution.output}
+nodes:
+- name: final_code_execution
+  type: python
+  source:
+    type: code
+    path: code_execution.py
+  inputs:
+    code_snippet: ${code_refine.output}
+- name: math_example
+  type: python
+  source:
+    type: code
+    path: math_example.py
+  inputs: {}
+- name: code_refine
+  type: python
+  source:
+    type: code
+    path: code_refine.py
+  inputs:
+    original_code: ${code_gen.output}
+- name: code_gen
+  type: llm
+  source:
+    type: code
+    path: ask_llm.jinja2
+  inputs:
+    deployment_name: gpt-35-turbo
+    question: ${inputs.math_question}
+    examples: ${math_example.output}
+  connection: open_ai_connection
+  api: chat
diff --git a/examples/flows/standard/maths-to-code/math_data.jsonl b/examples/flows/standard/maths-to-code/math_data.jsonl
new file mode 100644
index 00000000000..6cbd6d7e6d9
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/math_data.jsonl
@@ -0,0 +1,20 @@
+{"question": "What is the sum of 5 and 3?", "answer": "8"}
+{"question": "Subtract 7 from 10.", "answer": "3"}
+{"question": "Multiply 6 by 4.", "answer": "24"}
+{"question": "Divide 20 by 5.", "answer": "4"}
+{"question": "What is the square of 7?", "answer": "49"}
+{"question": "What is the square root of 81?", "answer": "9"}
+{"question": "If a rectangle has a length of 10 and width of 5, what is the area?", "answer": "50"}
+{"question": "A circle has a radius of 7, what is the area? (Use 3.14 for pi)", "answer": "153.86"}
+{"question": "Solve for x in the equation 2x + 3 = 9.", "answer": "3"}
+{"question": "What is the value of x if 5x = 25?", "answer": "5"}
+{"question": "A car travels 200 miles in 4 hours. What is the average speed of the car?", "answer": "50"}
+{"question": "A car travels at a speed of 60 mph. How long will it take to travel 180 miles?", "answer": "3"}
+{"question": "If a car travels at a speed of 40 mph for 2 hours, how far will it travel?","answer": "80"}
+{"question":"A rectangle has length = 10 cm and width = 5 cm. What is its area?", "answer":"50"}
+{"question":"A circle has radius = 7 cm. What is its circumference? (Use pi =3.14)", "answer":"43.96"}
+{"question":"A triangle has base =10 cm and height =5 cm. What is its area?", "answer":"25"}
+{"question":"What is the slope of the line that passes through (2,3) and (4,7)?", "answer":"2"}
+{"question":"The distance between A and B is 2000km, A is moving towards B with speed 80km/hour, meanwhile B is moving towards A with speed 120km/hour, how many hours later A and B can meet?", "answer":"10"}
+{"question":"The lengths of the two perpendicular sides of a right triangle are 6cm and 8cm. What is the length of the hypotenuse?", "answer": "10"}
+{"question":"A is running with average speed 10km/hour, A already run half hour. B start to chase A along the same route with average speed 15km/hour, how many hours B will take to meet A?", "answer":"1"}
\ No newline at end of file
diff --git a/examples/flows/standard/maths-to-code/math_example.py b/examples/flows/standard/maths-to-code/math_example.py
new file mode 100644
index 00000000000..74ce04fb1ff
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/math_example.py
@@ -0,0 +1,50 @@
+from promptflow import tool
+
+
+@tool
+def prepare_example():
+    return [
+      {
+        "question": "What is 37593 * 67?",
+        "code": "{\n    \"code\": \"print(37593 * 67)\"\n}",
+        "answer": "2512641",
+      },
+      {
+        "question": "What is the value of x in the equation 2x + 3 = 11?",
+        "code": "{\n    \"code\": \"print((11-3)/2)\"\n}",
+        "answer": "4",
+      },
+      {
+        "question": "How many of the integers between 0 and 99 inclusive are divisible by 8?",
+        "code": "{\n    \"code\": \"count = 0\\nfor i in range(100):\\n    \
+          if i % 8 == 0:\\n        count += 1\\nprint(count)\"\n}",
+        "answer": "10",
+      },
+      {
+        "question": "Janet's ducks lay 16 eggs per day. \
+          She eats three for breakfast every morning and bakes muffins for her friends every day with four.\
+            She sells the remainder at the farmers' market daily for $2 per fresh duck egg. \
+              How much in dollars does she make every day at the farmers' market?",
+        "code": "{\n    \"code\": \"print((16-3-4)*2)\"\n}",
+        "answer": "18",
+      },
+      {
+        "question": "What is the sum of the powers of 3 (3^i) that are smaller than 100?",
+        "code": "{\n    \"code\": \"sum = 0\\ni = 0\n\
+          while 3**i < 100:\\n    sum += 3**i\\n    i += 1\\nprint(sum)\"\n}",
+        "answer": "40",
+      },
+      {
+        "question": "Carla is downloading a 200 GB file. She can download 2 GB/minute, \
+          but 40% of the way through the download, the download fails.\
+            Then Carla has to restart the download from the beginning. \
+              How load did it take her to download the file in minutes?",
+        "code": "{\n    \"code\": \"print(200/2*1.4)\"\n}",
+        "answer": "140",
+      },
+      {
+        "question": "What is the sum of the 10 first positive integers?",
+        "code": "{\n    \"code\": \"print(sum(range(1,11)))\"\n}",
+        "answer": "55",
+      }
+    ]
diff --git a/examples/flows/standard/maths-to-code/math_test.ipynb b/examples/flows/standard/maths-to-code/math_test.ipynb
new file mode 100644
index 00000000000..0e8c27ca6bc
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/math_test.ipynb
@@ -0,0 +1,893 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# setup pf client and execution path\n",
+    "\n",
+    "from promptflow import PFClient\n",
+    "import json\n",
+    "import os\n",
+    "\n",
+    "pf = PFClient()\n",
+    "\n",
+    "root = os.path.join(os.getcwd(), \"../\")\n",
+    "flow = os.path.join(root, \"maths-to-code\")\n",
+    "data = os.path.join(flow, \"math_data.jsonl\")\n",
+    "eval_flow = os.path.join(root, \"../evaluation/eval-accuracy-maths-to-code\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# start batch run of maths-to-code\n",
+    "base_run = pf.run(\n",
+    "    flow = flow, \n",
+    "    data = data, \n",
+    "    column_mapping={\"math_question\": \"${data.question}\"},\n",
+    "    display_name=\"maths_to_code_batch_run\",\n",
+    "    stream=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>inputs.math_question</th>\n",
+       "      <th>inputs.line_number</th>\n",
+       "      <th>outputs.answer</th>\n",
+       "      <th>outputs.code</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>What is the sum of 5 and 3?</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>print(5 + 3)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Subtract 7 from 10.</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>print(10 - 7)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Multiply 6 by 4.</td>\n",
+       "      <td>2</td>\n",
+       "      <td>24.000000</td>\n",
+       "      <td>print(6 * 4)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Divide 20 by 5.</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>print(20 / 5)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>What is the square of 7?</td>\n",
+       "      <td>4</td>\n",
+       "      <td>49.000000</td>\n",
+       "      <td>print(7**2)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>What is the square root of 81?</td>\n",
+       "      <td>5</td>\n",
+       "      <td>9.000000</td>\n",
+       "      <td>import math\\nprint(math.sqrt(81))</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>If a rectangle has a length of 10 and width of...</td>\n",
+       "      <td>6</td>\n",
+       "      <td>50.000000</td>\n",
+       "      <td>length = 10\\nwidth = 5\\narea = length * width\\...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>A circle has a radius of 7, what is the area? ...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>153.860000</td>\n",
+       "      <td>area = 3.14 * (7**2)\\nprint(area)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Solve for x in the equation 2x + 3 = 9.</td>\n",
+       "      <td>8</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>print((9-3)/2)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>What is the value of x if 5x = 25?</td>\n",
+       "      <td>9</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>print(25/5)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>A car travels 200 miles in 4 hours. What is th...</td>\n",
+       "      <td>10</td>\n",
+       "      <td>50.000000</td>\n",
+       "      <td>print(200/4)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>A car travels at a speed of 60 mph. How long w...</td>\n",
+       "      <td>11</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>print(180 / 60)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>If a car travels at a speed of 40 mph for 2 ho...</td>\n",
+       "      <td>12</td>\n",
+       "      <td>80.000000</td>\n",
+       "      <td>print(40 * 2)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A rectangle has length = 10 cm and width = 5 c...</td>\n",
+       "      <td>13</td>\n",
+       "      <td>50.000000</td>\n",
+       "      <td>print(10 * 5)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>A circle has radius = 7 cm. What is its circum...</td>\n",
+       "      <td>14</td>\n",
+       "      <td>43.960000</td>\n",
+       "      <td>radius = 7\\ncircumference = 2 * 3.14 * radius\\...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>A triangle has base =10 cm and height =5 cm. W...</td>\n",
+       "      <td>15</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>print((10*5)/2)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>What is the slope of the line that passes thro...</td>\n",
+       "      <td>16</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>x1, y1 = 2, 3\\nx2, y2 = 4, 7\\nslope = (y2 - y1...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>The distance between A and B is 2000km, A is m...</td>\n",
+       "      <td>17</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>print(2000/(80+120))</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>The lengths of the two perpendicular sides of ...</td>\n",
+       "      <td>18</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>import math\\n\\na = 6\\nb = 8\\nc = math.sqrt(a**...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>A is running with average speed 10km/hour, A a...</td>\n",
+       "      <td>19</td>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>A_distance = 10 * 0.5\\nB_speed = 15\\nB_time = ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 inputs.math_question  inputs.line_number  \\\n",
+       "0                         What is the sum of 5 and 3?                   0   \n",
+       "1                                 Subtract 7 from 10.                   1   \n",
+       "2                                    Multiply 6 by 4.                   2   \n",
+       "3                                     Divide 20 by 5.                   3   \n",
+       "4                            What is the square of 7?                   4   \n",
+       "5                      What is the square root of 81?                   5   \n",
+       "6   If a rectangle has a length of 10 and width of...                   6   \n",
+       "7   A circle has a radius of 7, what is the area? ...                   7   \n",
+       "8             Solve for x in the equation 2x + 3 = 9.                   8   \n",
+       "9                  What is the value of x if 5x = 25?                   9   \n",
+       "10  A car travels 200 miles in 4 hours. What is th...                  10   \n",
+       "11  A car travels at a speed of 60 mph. How long w...                  11   \n",
+       "12  If a car travels at a speed of 40 mph for 2 ho...                  12   \n",
+       "13  A rectangle has length = 10 cm and width = 5 c...                  13   \n",
+       "14  A circle has radius = 7 cm. What is its circum...                  14   \n",
+       "15  A triangle has base =10 cm and height =5 cm. W...                  15   \n",
+       "16  What is the slope of the line that passes thro...                  16   \n",
+       "17  The distance between A and B is 2000km, A is m...                  17   \n",
+       "18  The lengths of the two perpendicular sides of ...                  18   \n",
+       "19  A is running with average speed 10km/hour, A a...                  19   \n",
+       "\n",
+       "    outputs.answer                                       outputs.code  \n",
+       "0         8.000000                                       print(5 + 3)  \n",
+       "1         3.000000                                      print(10 - 7)  \n",
+       "2        24.000000                                       print(6 * 4)  \n",
+       "3         4.000000                                      print(20 / 5)  \n",
+       "4        49.000000                                        print(7**2)  \n",
+       "5         9.000000                  import math\\nprint(math.sqrt(81))  \n",
+       "6        50.000000  length = 10\\nwidth = 5\\narea = length * width\\...  \n",
+       "7       153.860000                  area = 3.14 * (7**2)\\nprint(area)  \n",
+       "8         3.000000                                     print((9-3)/2)  \n",
+       "9         5.000000                                        print(25/5)  \n",
+       "10       50.000000                                       print(200/4)  \n",
+       "11        3.000000                                    print(180 / 60)  \n",
+       "12       80.000000                                      print(40 * 2)  \n",
+       "13       50.000000                                      print(10 * 5)  \n",
+       "14       43.960000  radius = 7\\ncircumference = 2 * 3.14 * radius\\...  \n",
+       "15       25.000000                                    print((10*5)/2)  \n",
+       "16        2.000000  x1, y1 = 2, 3\\nx2, y2 = 4, 7\\nslope = (y2 - y1...  \n",
+       "17       10.000000                               print(2000/(80+120))  \n",
+       "18       10.000000  import math\\n\\na = 6\\nb = 8\\nc = math.sqrt(a**...  \n",
+       "19        0.333333  A_distance = 10 * 0.5\\nB_speed = 15\\nB_time = ...  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Show output of flow run\n",
+    "pf.get_details(base_run)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# evaluate against the batch run and groundtruth data\n",
+    "eval_run = pf.run(\n",
+    "    flow = eval_flow, \n",
+    "    data = data, \n",
+    "    run = base_run,\n",
+    "    column_mapping={\"groundtruth\": \"${data.answer}\", \"prediction\": \"${run.outputs.answer}\"},\n",
+    "    display_name=\"maths_to_code_eval_run\",\n",
+    "    stream=True\n",
+    ")\n",
+    "\n",
+    "pf.get_details(eval_run)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'accuracy': 0.95, 'error_rate': 0.0}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Get metrics of the evaluation flow run\n",
+    "pf.get_metrics(eval_run)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize the flow run and evaluation run with HTML\n",
+    "pf.visualize([base_run, eval_run])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run on Azure\n",
+    "If you want to run and evaluate your flow on Azure, you can using following example to setup your Azure ML workspace "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
+    "\n",
+    "# init credential\n",
+    "try:\n",
+    "    credential = DefaultAzureCredential()\n",
+    "    # Check if given credential can get token successfully.\n",
+    "    credential.get_token(\"https://management.azure.com/.default\")\n",
+    "except Exception as ex:\n",
+    "    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n",
+    "    credential = InteractiveBrowserCredential()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from promptflow.azure import PFClient\n",
+    "\n",
+    "try:\n",
+    "    pf = PFClient.from_config(credential=credential)\n",
+    "except Exception as ex:\n",
+    "    # NOTE: Update following workspace information if not correctly configure before\n",
+    "    client_config = {\n",
+    "        \"subscription_id\": \"<SUBSCRIPTION_ID>\",\n",
+    "        \"resource_group\": \"<RESOURCE_GROUP>\",\n",
+    "        \"workspace_name\": \"<AML_WORKSPACE_NAME>\",\n",
+    "    }\n",
+    "\n",
+    "    if client_config[\"subscription_id\"].startswith(\"<\"):\n",
+    "        print(\n",
+    "            \"please update your <SUBSCRIPTION_ID> <RESOURCE_GROUP> <AML_WORKSPACE_NAME> in notebook cell\"\n",
+    "        )\n",
+    "        raise ex\n",
+    "    else:  # write and reload from config file\n",
+    "        import json, os\n",
+    "\n",
+    "        config_path = \"../.azureml/config.json\"\n",
+    "        os.makedirs(os.path.dirname(config_path), exist_ok=True)\n",
+    "        with open(config_path, \"w\") as fo:\n",
+    "            fo.write(json.dumps(client_config))\n",
+    "        pf = PFClient.from_config(credential=credential, path=config_path)\n",
+    "\n",
+    "print(pf)\n",
+    "\n",
+    "# NOTE: replace with your own runtime name in your Azure Machine Learning workspace\n",
+    "runtime = '<runtime_name>'\n",
+    "\n",
+    "# NOTE: note that you need to replace <open_ai_connection> and <gpt-35-turbo> with your own connection and deployment name in your Azure Machine Learning workspace\n",
+    "connection_mapping = {\"code_gen\": {\"connection\": \"<my_azure_open_ai_connection>\", \"deployment_name\": \"<gpt-35-turbo>\"}}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# batch run of maths to code\n",
+    "\n",
+    "base_run = pf.run(\n",
+    "    flow = flow,\n",
+    "    data = data,\n",
+    "    column_mapping = {\"math_question\": \"${data.question}\"},\n",
+    "    connections = connection_mapping,\n",
+    "    runtime = runtime,\n",
+    "    stream = True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>inputs.math_question</th>\n",
+       "      <th>outputs.code</th>\n",
+       "      <th>outputs.answer</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Subtract 7 from 10.</td>\n",
+       "      <td>print(10 - 7)</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>What is the sum of 5 and 3?</td>\n",
+       "      <td>print(5+3)</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>What is the square of 7?</td>\n",
+       "      <td>print(7**2)</td>\n",
+       "      <td>49</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A car travels 200 miles in 4 hours. What is th...</td>\n",
+       "      <td>print(200 / 4)</td>\n",
+       "      <td>50.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Multiply 6 by 4.</td>\n",
+       "      <td>print(6 * 4)</td>\n",
+       "      <td>24</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>What is the value of x if 5x = 25?</td>\n",
+       "      <td>print(25/5)</td>\n",
+       "      <td>5.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>If a car travels at a speed of 40 mph for 2 ho...</td>\n",
+       "      <td>print(40 * 2)</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>A car travels at a speed of 60 mph. How long w...</td>\n",
+       "      <td>print(180 / 60)</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>A triangle has base =10 cm and height =5 cm. W...</td>\n",
+       "      <td>print((10*5)/2)</td>\n",
+       "      <td>25.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Divide 20 by 5.</td>\n",
+       "      <td>print(20 / 5)</td>\n",
+       "      <td>4.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>If a rectangle has a length of 10 and width of...</td>\n",
+       "      <td>print(10 * 5)</td>\n",
+       "      <td>50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>What is the square root of 81?</td>\n",
+       "      <td>import math\\nprint(math.sqrt(81))</td>\n",
+       "      <td>9.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Solve for x in the equation 2x + 3 = 9.</td>\n",
+       "      <td>print((9-3)/2)</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A rectangle has length = 10 cm and width = 5 c...</td>\n",
+       "      <td>length = 10\\ncm_width = 5\\narea = length * wid...</td>\n",
+       "      <td>name 'width' is not defined</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>A circle has a radius of 7, what is the area? ...</td>\n",
+       "      <td>pi = 3.14\\nradius = 7\\narea = pi * radius ** 2...</td>\n",
+       "      <td>153.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>What is the slope of the line that passes thro...</td>\n",
+       "      <td>slope = (7-3)/(4-2)\\nprint(slope)</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>The distance between A and B is 2000km, A is m...</td>\n",
+       "      <td>distance = 2000\\nspeed_A = 80\\nspeed_B = 120\\n...</td>\n",
+       "      <td>10.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>A circle has radius = 7 cm. What is its circum...</td>\n",
+       "      <td>pi = 3.14\\nradius = 7\\ncircumference = 2 * pi ...</td>\n",
+       "      <td>43.96</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>The lengths of the two perpendicular sides of ...</td>\n",
+       "      <td>import math\\n\\na = 6\\nb = 8\\nc = math.sqrt(a**...</td>\n",
+       "      <td>10.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>A is running with average speed 10km/hour, A a...</td>\n",
+       "      <td>distance_A = 10 * (0.5)\\n\\nspeed_B = 15\\n\\ntim...</td>\n",
+       "      <td></td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 inputs.math_question  \\\n",
+       "0                                 Subtract 7 from 10.   \n",
+       "1                         What is the sum of 5 and 3?   \n",
+       "2                            What is the square of 7?   \n",
+       "3   A car travels 200 miles in 4 hours. What is th...   \n",
+       "4                                    Multiply 6 by 4.   \n",
+       "5                  What is the value of x if 5x = 25?   \n",
+       "6   If a car travels at a speed of 40 mph for 2 ho...   \n",
+       "7   A car travels at a speed of 60 mph. How long w...   \n",
+       "8   A triangle has base =10 cm and height =5 cm. W...   \n",
+       "9                                     Divide 20 by 5.   \n",
+       "10  If a rectangle has a length of 10 and width of...   \n",
+       "11                     What is the square root of 81?   \n",
+       "12            Solve for x in the equation 2x + 3 = 9.   \n",
+       "13  A rectangle has length = 10 cm and width = 5 c...   \n",
+       "14  A circle has a radius of 7, what is the area? ...   \n",
+       "15  What is the slope of the line that passes thro...   \n",
+       "16  The distance between A and B is 2000km, A is m...   \n",
+       "17  A circle has radius = 7 cm. What is its circum...   \n",
+       "18  The lengths of the two perpendicular sides of ...   \n",
+       "19  A is running with average speed 10km/hour, A a...   \n",
+       "\n",
+       "                                         outputs.code  \\\n",
+       "0                                       print(10 - 7)   \n",
+       "1                                          print(5+3)   \n",
+       "2                                         print(7**2)   \n",
+       "3                                      print(200 / 4)   \n",
+       "4                                        print(6 * 4)   \n",
+       "5                                         print(25/5)   \n",
+       "6                                       print(40 * 2)   \n",
+       "7                                     print(180 / 60)   \n",
+       "8                                     print((10*5)/2)   \n",
+       "9                                       print(20 / 5)   \n",
+       "10                                      print(10 * 5)   \n",
+       "11                  import math\\nprint(math.sqrt(81))   \n",
+       "12                                     print((9-3)/2)   \n",
+       "13  length = 10\\ncm_width = 5\\narea = length * wid...   \n",
+       "14  pi = 3.14\\nradius = 7\\narea = pi * radius ** 2...   \n",
+       "15                  slope = (7-3)/(4-2)\\nprint(slope)   \n",
+       "16  distance = 2000\\nspeed_A = 80\\nspeed_B = 120\\n...   \n",
+       "17  pi = 3.14\\nradius = 7\\ncircumference = 2 * pi ...   \n",
+       "18  import math\\n\\na = 6\\nb = 8\\nc = math.sqrt(a**...   \n",
+       "19  distance_A = 10 * (0.5)\\n\\nspeed_B = 15\\n\\ntim...   \n",
+       "\n",
+       "                 outputs.answer  \n",
+       "0                             3  \n",
+       "1                             8  \n",
+       "2                            49  \n",
+       "3                          50.0  \n",
+       "4                            24  \n",
+       "5                           5.0  \n",
+       "6                            80  \n",
+       "7                           3.0  \n",
+       "8                          25.0  \n",
+       "9                           4.0  \n",
+       "10                           50  \n",
+       "11                          9.0  \n",
+       "12                          3.0  \n",
+       "13  name 'width' is not defined  \n",
+       "14                       153.86  \n",
+       "15                          2.0  \n",
+       "16                         10.0  \n",
+       "17                        43.96  \n",
+       "18                         10.0  \n",
+       "19                               "
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# get output of flow run\n",
+    "pf.get_details(base_run)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# evaluation run against base run\n",
+    "\n",
+    "eval_run = pf.run(\n",
+    "    flow = eval_flow, \n",
+    "    data = data, \n",
+    "    run = base_run,\n",
+    "    column_mapping={\"groundtruth\": \"${data.answer}\", \"prediction\": \"${run.outputs.answer}\"},\n",
+    "    runtime = runtime,\n",
+    "    stream = True,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>inputs.groundtruth</th>\n",
+       "      <th>inputs.prediction</th>\n",
+       "      <th>outputs.score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4.00</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>3.00</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>24.00</td>\n",
+       "      <td>24</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>49.00</td>\n",
+       "      <td>49</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5.00</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>50.00</td>\n",
+       "      <td>50</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>8.00</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>3.00</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>43.96</td>\n",
+       "      <td>43.96</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>3.00</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>153.86</td>\n",
+       "      <td>153.86</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>9.00</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>80.00</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>25.00</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>50.00</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>2.00</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>50.00</td>\n",
+       "      <td>name 'width' is not defined</td>\n",
+       "      <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>10.00</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>10.00</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>1.00</td>\n",
+       "      <td></td>\n",
+       "      <td>-1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    inputs.groundtruth            inputs.prediction  outputs.score\n",
+       "0                 4.00                          4.0              1\n",
+       "1                 3.00                            3              1\n",
+       "2                24.00                           24              1\n",
+       "3                49.00                           49              1\n",
+       "4                 5.00                          5.0              1\n",
+       "5                50.00                           50              1\n",
+       "6                 8.00                            8              1\n",
+       "7                 3.00                          3.0              1\n",
+       "8                43.96                        43.96              1\n",
+       "9                 3.00                          3.0              1\n",
+       "10              153.86                       153.86              1\n",
+       "11                9.00                          9.0              1\n",
+       "12               80.00                           80              1\n",
+       "13               25.00                         25.0              1\n",
+       "14               50.00                         50.0              1\n",
+       "15                2.00                          2.0              1\n",
+       "16               50.00  name 'width' is not defined             -1\n",
+       "17               10.00                         10.0              1\n",
+       "18               10.00                         10.0              1\n",
+       "19                1.00                                          -1"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# get output of evaluation run\n",
+    "pf.get_details(eval_run)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "    \"accuracy\": 0.9,\n",
+      "    \"error_rate\": 0.1\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "metrics = pf.get_metrics(eval_run)\n",
+    "print(json.dumps(metrics, indent=4))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pf",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/flows/standard/maths-to-code/prompt_gen.jinja2 b/examples/flows/standard/maths-to-code/prompt_gen.jinja2
new file mode 100644
index 00000000000..b676bc0818b
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/prompt_gen.jinja2
@@ -0,0 +1,25 @@
+system:
+I want you to act as a Math expert specializing in Algebra, Geometry, and Calculus. Given the question, develop python code to model the user's question.
+The python code will print the result at the end.
+Please generate executable python code, your reply will be in JSON format, something like:
+{
+    "code": "print(1+1)"
+}
+
+user:
+This a set of examples including question and the final answer:
+{% for ex in examples %}
+QUESTION: {{ ex.question }}
+CODE:
+{{ ex.code }}
+
+{% endfor %}
+
+Now come to the real task, make sure return a valid json. The json should contain a key named "code" and the value is the python code. For example:
+{
+    "code": "print(1+1)"
+}
+QUESTION: {{ question }}
+CODE:
+
+
diff --git a/examples/flows/standard/maths-to-code/requirements.txt b/examples/flows/standard/maths-to-code/requirements.txt
new file mode 100644
index 00000000000..95b29c850b5
--- /dev/null
+++ b/examples/flows/standard/maths-to-code/requirements.txt
@@ -0,0 +1,4 @@
+langchain
+sympy
+promptflow[azure]
+promptflow-tools
\ No newline at end of file