Merge pull request #385 from Cloud-Code-AI/384-e2e-gen-add-retry-attempts-for-failed-tasks

sauravpanda · web-flow · commit c794101a3bb6 · 2024-08-08T01:43:17.000-07:00
384 e2e gen add retry attempts for failed tasks
diff --git a/README.md b/README.md
@@ -75,7 +75,7 @@ Kaizen is an open-source AI-powered suite that revolutionizes your code quality
    
    **Mac/Linux**
    ```bash
-   PYTHONPATH=. poetry run python examples/basic/generate.py
+   PYTHONPATH=. poetry run python examples/e2e_test/generate.py
    ```
 
    **Windows**
@@ -89,7 +89,7 @@ Kaizen is an open-source AI-powered suite that revolutionizes your code quality
    
    **Mac/Linux**
    ```bash
-   PYTHONPATH=. poetry run python examples/basic/execute.py
+   PYTHONPATH=. poetry run python examples/e2e_test/execute.py
    ```
 
    **Windows**
diff --git a/cli/kaizen_cli/cli.py b/cli/kaizen_cli/cli.py
@@ -1,7 +1,7 @@
 import click
 import os
 import json
-from kaizen.generator.ui import UITestGenerator
+from kaizen.generator.e2e_tests import E2ETestGenerator
 from kaizen.generator.unit_test import UnitTestGenerator
 
 CONFIG_FILE = os.path.expanduser("~/.myapp_config.json")
@@ -82,7 +82,7 @@ def run(obj, command, region):
 @click.argument("url", required=True)
 def ui_tests(url):
     """Run ui test generation"""
-    UITestGenerator().generate_ui_tests(url)
+    E2ETestGenerator().generate_e2e_tests(url)
 
 
 @cli.command()
@@ -104,7 +104,7 @@ def reviewer():
 @click.argument("branch", required=True)
 def work(url):
     """Run ui test generation"""
-    UITestGenerator().generate_ui_tests(url)
+    E2ETestGenerator().generate_e2e_tests(url)
 
 
 if __name__ == "__main__":
diff --git a/cli/pyproject.toml b/cli/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "kaizen-cli"
-version = "0.1.4"
+version = "0.1.5"
 description = ""
 authors = ["Saurav Panda <sgp65@cornell.edu>"]
 readme = "README.md"
diff --git a/docs/pages/features/e2e_testing.mdx b/docs/pages/features/e2e_testing.mdx
@@ -27,7 +27,7 @@ The E2E UI Testing feature is designed to streamline the process of creating and
 
 5. **Continuous Integration**: As the web application evolves, the feature can regenerate or update the test scripts to ensure they remain aligned with the latest changes and requirements.
 
-You can find an example [here](https://github.com/Cloud-Code-AI/kaizen/tree/main/examples/basic)
+You can find an example [here](https://github.com/Cloud-Code-AI/kaizen/tree/main/examples/e2e_test)
 
 ### Benefits
 
diff --git a/examples/e2e_test/README.MD b/examples/e2e_test/README.MD
diff --git a/examples/e2e_test/execute.py b/examples/e2e_test/execute.py
diff --git a/examples/e2e_test/generate.py b/examples/e2e_test/generate.py
@@ -1,18 +1,20 @@
-from kaizen.generator.ui import UITestGenerator
+from kaizen.generator.e2e_tests import E2ETestGenerator
 import time
 import sys
+import traceback
 
-generator = UITestGenerator()
+generator = E2ETestGenerator()
 
 WEBPAGE_URL = "https://cloudcode.ai"
 
 print(f"Generating UI tests for `{WEBPAGE_URL}`, please wait...")
 start_time = time.time()
 
 try:
-    tests, _ = generator.generate_ui_tests(WEBPAGE_URL)
+    tests, _ = generator.generate_e2e_tests(WEBPAGE_URL)
 except Exception as e:
     print(f"Error: {e}")
+    print(traceback.format_exc())
     sys.exit(1)
 
 end_time = time.time()
@@ -28,3 +30,6 @@
         print(f'Desc: {t["test_description"]}')
         print(f'Code: \n{t["code"]}')
         print("-----------------------------------------------------------")
+
+results = generator.run_tests()
+print(f"Test Execution results: \n {results}")
diff --git a/examples/unittest/rust.py b/examples/unittest/rust.py
@@ -1,7 +1,7 @@
 from kaizen.generator.unit_test import UnitTestGenerator
 
 generator = UnitTestGenerator()
-code = '''
+code = """
 struct Calculator {
     result: i32,
 }
@@ -37,7 +37,7 @@
     println!("{}", calc.get_result());  // Should print 6
     println!("{}", greet("Alice"));  // Should print "Hello, Alice!"
 }
-'''
+"""
 
 test_results = generator.run_tests()
 
diff --git a/github_app/main.py b/github_app/main.py
@@ -10,7 +10,7 @@
 from kaizen.utils.config import ConfigData
 import logging
 
-# from cloudcode.generator.ui import UITester
+# from cloudcode.generator.ui import E2ETestGenerator
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
diff --git a/kaizen/actors/e2e_test_runner.py b/kaizen/actors/e2e_test_runner.py
@@ -0,0 +1,40 @@
+import asyncio
+import glob
+import json
+import os
+from playwright.async_api import async_playwright
+
+
+class E2ETestRunner:
+    def __init__(self, test_directory="./.kaizen/e2e-tests/"):
+        self.test_directory = test_directory
+
+    def run_tests(self):
+        """
+        This method runs playwright tests and updates logs and status accordingly.
+        """
+
+        async def run_test(test):
+            async with async_playwright() as p:
+                browser = await p.chromium.launch()
+                page = await browser.new_page()
+                try:
+                    await page.goto(test["url"])
+                    await page.evaluate(test["code"])
+                    test["status"] = "Passed"
+                except Exception as e:
+                    test["status"] = "Failed"
+                    test["error"] = str(e)
+                finally:
+                    await browser.close()
+
+        tests_dir = self.test_directory
+        tests = []
+        for test_file in glob.glob(os.path.join(tests_dir, "*.json")):
+            with open(test_file, "r") as f:
+                tests.extend(json.load(f))
+
+        loop = asyncio.get_event_loop()
+        tasks = [loop.create_task(run_test(test)) for test in tests]
+        loop.run_until_complete(asyncio.gather(*tasks))
+        return tests
diff --git a/kaizen/generator/e2e_tests.py b/kaizen/generator/e2e_tests.py
@@ -1,21 +1,23 @@
 import logging
 import os
 from typing import Optional
-from kaizen.helpers import output, parser
+from kaizen.helpers import output
 from kaizen.llms.provider import LLMProvider
+from kaizen.actors.e2e_test_runner import E2ETestRunner
 from kaizen.llms.prompts.ui_tests_prompts import (
-    UI_MODULES_PROMPT,
-    UI_TESTS_SYSTEM_PROMPT,
+    E2E_MODULES_PROMPT,
+    E2E_TESTS_SYSTEM_PROMPT,
     PLAYWRIGHT_CODE_PROMPT,
     PLAYWRIGHT_CODE_PLAN_PROMPT,
 )
 
 
-class UITestGenerator:
+class E2ETestGenerator:
     def __init__(self):
         self.logger = logging.getLogger(__name__)
-        self.provider = LLMProvider(system_prompt=UI_TESTS_SYSTEM_PROMPT)
+        self.provider = LLMProvider(system_prompt=E2E_TESTS_SYSTEM_PROMPT)
         self.custom_model = None
+        self.test_folder_path = ".kaizen/e2e-tests"
         self.total_usage = {
             "prompt_tokens": 0,
             "completion_tokens": 0,
@@ -26,13 +28,13 @@ def __init__(self):
             if "type" in self.custom_model:
                 del self.custom_model["type"]
 
-    def generate_ui_tests(
+    def generate_e2e_tests(
         self,
         web_url: str,
         folder_path: Optional[str] = "",
     ):
         """
-        This method generates UI tests with cypress code for a given web URL.
+        This method generates e2e tests with cypress code for a given web URL.
         """
         web_content = self.extract_webpage(web_url)
         test_modules = self.identify_modules(web_content)
@@ -49,17 +51,19 @@ def extract_webpage(self, web_url: str):
         """
 
         html = output.get_web_html(web_url)
+        self.logger.info(f"Extracted HTML data for {web_url}")
         return html
 
     def identify_modules(self, web_content: str, user: Optional[str] = None):
         """
         This method identifies the different UI modules from a webpage.
         """
-        prompt = UI_MODULES_PROMPT.format(WEB_CONTENT=web_content)
-        resp, usage = self.provider.chat_completion(
+        prompt = E2E_MODULES_PROMPT.format(WEB_CONTENT=web_content)
+        resp, usage = self.provider.chat_completion_with_json(
             prompt, user=user, custom_model=self.custom_model
         )
-        modules = parser.extract_multi_json(resp)
+        modules = resp["tests"]
+        self.logger.info(f"Extracted modules")
         return {"modules": modules, "usage": usage}
 
     def generate_playwright_code(
@@ -70,21 +74,21 @@ def generate_playwright_code(
         user: Optional[str] = None,
     ):
         """
-        This method generates playwright code for a particular UI test.
+        This method generates playwright code for a particular E2E test.
         """
         code_gen_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
         # First generate a plan for code
         prompt = PLAYWRIGHT_CODE_PLAN_PROMPT.format(
             WEB_CONTENT=web_content, TEST_DESCRIPTION=test_description, URL=web_url
         )
-        plan, usage = self.provider.chat_completion(
+        plan, usage = self.provider.chat_completion_with_retry(
             prompt, user=user, custom_model=self.custom_model
         )
         code_gen_usage = self.provider.update_usage(code_gen_usage, usage)
 
         # Next generate the code based on plan
         code_prompt = PLAYWRIGHT_CODE_PROMPT.format(PLAN_TEXT=plan)
-        code, usage = self.provider.chat_completion(
+        code, usage = self.provider.chat_completion_with_retry(
             code_prompt, user=user, custom_model=self.custom_model
         )
         code_gen_usage = self.provider.update_usage(code_gen_usage, usage)
@@ -103,6 +107,9 @@ def generate_module_tests(self, web_content: str, test_modules: dict, web_url: s
         }
         for module in ui_tests:
             for test in module["tests"]:
+                self.logger.info(
+                    f"Generating playwright code for {test['test_description']}"
+                )
                 test_description = test["test_description"]
                 playwright_code = self.generate_playwright_code(
                     web_content, test_description, web_url
@@ -120,12 +127,15 @@ def store_tests_files(self, json_tests: list, folder_path: str = ""):
         if not folder_path:
             folder_path = output.get_parent_folder()
 
-        folder_path = os.path.join(folder_path, ".kaizen/ui-tests")
+        folder_path = os.path.join(folder_path, self.test_folder_path)
         output.create_folder(folder_path)
         output.create_test_files(json_tests, folder_path)
+        self.logger.info("Successfully store the files")
 
     def run_tests(self, ui_tests: dict):
         """
         This method runs playwright tests and updates logs and status accordingly.
         """
-        pass
+        runner = E2ETestRunner()
+        results = runner.run_tests(ui_tests)
+        return results
diff --git a/kaizen/helpers/output.py b/kaizen/helpers/output.py
@@ -60,16 +60,38 @@ def get_web_html(url):
     html = asyncio.run(get_html(url))
     soup = BeautifulSoup(html, "html.parser")
 
+    # Remove SVG elements
     for svg in soup.find_all("svg"):
         svg.decompose()
 
-    # Delete each comment
+    # Remove HTML comments
     for comment in soup.find_all(text=lambda text: isinstance(text, Comment)):
         comment.extract()
 
+    # Remove <style> elements
     for style_block in soup.find_all("style"):
         style_block.decompose()
 
+    # Remove <script> elements
+    for script in soup.find_all("script"):
+        script.decompose()
+
+    # Remove <noscript> elements
+    for noscript in soup.find_all("noscript"):
+        noscript.decompose()
+
+    # Remove <link> elements (typically used for stylesheets)
+    for link in soup.find_all("link"):
+        link.decompose()
+
+    # Remove <meta> elements (typically used for metadata)
+    for meta in soup.find_all("meta"):
+        meta.decompose()
+
+    # Remove <head> element (contains metadata, scripts, and stylesheets)
+    for head in soup.find_all("head"):
+        head.decompose()
+
     pretty_html = soup.prettify()
     return pretty_html
 
diff --git a/kaizen/llms/prompts/ui_tests_prompts.py b/kaizen/llms/prompts/ui_tests_prompts.py
@@ -1,25 +1,45 @@
-UI_MODULES_PROMPT = """
+E2E_MODULES_PROMPT = """
 Assign yourself as a quality assurance engineer. 
 Read this code and design comprehensive tests to test the UI of this HTML. 
 Break it down into 5-10 separate modules and identify the possible things to test for each module. 
 For each module, also identify which tests should be checked repeatedly (e.g., after every code change, every build, etc.).
 
 Return the output as JSON with the following keys:
+{{"tests": {{
+    "id": "serial number to identify module",
+    "module_title": "title of the identified module",
+    "tests": [
+      {{
+        "id": "serial number for the test case",
+        "test_description": "description of the test case",
+        "test_name": "name of the test case",
+        "repeat": true,
+        "reason": "reason to add this test",
+      }},
+      ...
+    ],
+    "folder_name": "relevant name for the module",
+    "importance": "critical"
+  }}
+}}
+
+Details:
 id - serial number to identify module
 module_title - title of the identified module
 tests - JSON containing list of tests steps to carry out for that module with keys:
   id - serial number for the test case
   test_description - description of the test case
   test_name - name of the test case
   repeat - boolean indicating if this test should be checked repeatedly or not
+  reason - reason to add this test case
 folder_name - relevant name for the module
 importance - level of importance of this test out of ['critical', 'good_to_have', 'non_essential']
 
 Share the JSON output ONLY. No other text.
 CONTENT: ```{WEB_CONTENT}```
 """
 
-UI_TESTS_SYSTEM_PROMPT = """
+E2E_TESTS_SYSTEM_PROMPT = """
 You are a Quality Assurance AI assistant specializing in writing Playwright test scripts for web applications. Your goal is to create robust and maintainable test scripts that can be integrated into a CI/CD pipeline.
 
 When given requirements or specifications, you should:
diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py
@@ -161,6 +161,24 @@ def chat_completion_with_json(
         response = extract_json(response)
         return response, usage
 
+    @retry(max_attempts=3, delay=1)
+    def chat_completion_with_retry(
+        self,
+        prompt,
+        user: str = None,
+        model="default",
+        custom_model=None,
+        messages=None,
+    ):
+        response, usage = self.chat_completion(
+            prompt=prompt,
+            user=user,
+            model=model,
+            custom_model=custom_model,
+            messages=messages,
+        )
+        return response, usage
+
     def is_inside_token_limit(self, PROMPT: str, percentage: float = 0.8) -> bool:
         # Include system prompt in token calculation
         messages = [

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`from kaizen.generator.unit_test import UnitTestGenerator`
`2`	`2`
`3`	`3`	`generator = UnitTestGenerator()`
`4`		`-code = '''`
	`4`	`+code = """`
`5`	`5`	`struct Calculator {`
`6`	`6`	`result: i32,`
`7`	`7`	`}`
`@@ -37,7 +37,7 @@`
`37`	`37`	`println!("{}", calc.get_result()); // Should print 6`
`38`	`38`	`println!("{}", greet("Alice")); // Should print "Hello, Alice!"`
`39`	`39`	`}`
`40`		`-'''`
	`40`	`+"""`
`41`	`41`
`42`	`42`	`test_results = generator.run_tests()`
`43`	`43`