support basic text output in tests (#1113)

* ✨ feat: enhance prompt tests with format and transforms Introduce 'format' to PromptTest and refactored transforms logic. * ✨ feat: Improve script output handling with new format option GenAIScript now defaults to extracting `text` but allows `format: "json"`.
microsoft · Feb 8, 2025 · bc06cf3 · bc06cf3
1 parent b5c437f
commit bc06cf3
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 37 deletions.
diff --git a/docs/src/content/docs/reference/scripts/tests.mdx b/docs/src/content/docs/reference/scripts/tests.mdx
@@ -156,13 +156,15 @@ scripts({
 
 #### transform
 
-By default, the `asserts` are executed on the raw LLM output.
-However, you can use a javascript expression to select a part of the output to test.
+By default, GenAIScript extracts the `text` field from the output before sending it to PromptFoo.
+You can disable this mode by setting `format: "json"`; then the the `asserts` are executed on the raw LLM output.
+You can use a javascript expression to select a part of the output to test.
 
 ```js title="proofreader.genai.js" wrap "transform"
 scripts({
     tests: {
         files: "src/will-trigger.cancel.txt",
+        format: "json",
         asserts: {
             type: "equals",
             value: "cancelled",

diff --git a/packages/core/src/test.ts b/packages/core/src/test.ts
@@ -11,7 +11,7 @@ import {
 import { arrayify, logWarn } from "./util"
 import { runtimeHost } from "./host"
 import { ModelConnectionInfo, parseModelIdentifier } from "./models"
-import { deleteUndefinedValues } from "./cleaners"
+import { deleteEmptyValues, deleteUndefinedValues } from "./cleaners"
 import testSchema from "../../../docs/public/schemas/tests.json"
 import { validateJSONWithSchema } from "./schema"
 import { TraceOptions } from "./trace"
@@ -175,7 +175,6 @@ export async function generatePromptFooConfiguration(
     }
 
     const cli = options?.cli
-    const transform = "output.text"
 
     const resolveModel = (m: string) => runtimeHost.modelAliases[m]?.model ?? m
 
@@ -186,6 +185,14 @@ export async function generatePromptFooConfiguration(
     const defaultTest = deleteUndefinedValues({
         options: deleteUndefinedValues({ provider: testProvider }),
     })
+    const testTransforms = {
+        text: "output.text",
+        json: undefined as string,
+    }
+    const assertTransforms = {
+        text: undefined as string,
+        json: "output.text",
+    }
 
     // Create configuration object
     const res = {
@@ -238,42 +245,49 @@ export async function generatePromptFooConfiguration(
                 vars,
                 rubrics,
                 facts,
+                format = "text",
                 keywords = [],
                 forbidden = [],
                 asserts = [],
-            }) => ({
-                description,
-                vars: deleteUndefinedValues({
-                    files,
-                    vars,
-                }),
-                assert: [
-                    ...arrayify(keywords).map((kv) => ({
-                        type: "icontains", // Check if output contains keyword
-                        value: kv,
-                        transform,
-                    })),
-                    ...arrayify(forbidden).map((kv) => ({
-                        type: "not-icontains", // Check if output does not contain forbidden keyword
-                        value: kv,
-                        transform,
-                    })),
-                    ...arrayify(rubrics).map((value) => ({
-                        type: "llm-rubric", // Use LLM rubric for evaluation
-                        value,
-                        transform,
-                    })),
-                    ...arrayify(facts).map((value) => ({
-                        type: "factuality", // Check factuality of output
-                        value,
-                        transform,
-                    })),
-                    ...arrayify(asserts).map((assert) => ({
-                        ...assert,
-                        transform: assert.transform || transform, // Default transform
-                    })),
-                ].filter((a) => !!a), // Filter out any undefined assertions
-            })
+            }) =>
+                deleteEmptyValues({
+                    description,
+                    vars: deleteEmptyValues({
+                        files,
+                        workspaceFiles,
+                        vars,
+                    }),
+                    options: {
+                        transform: testTransforms[format],
+                    },
+                    assert: [
+                        ...arrayify(keywords).map((kv) => ({
+                            type: "icontains", // Check if output contains keyword
+                            value: kv,
+                            transform: assertTransforms[format],
+                        })),
+                        ...arrayify(forbidden).map((kv) => ({
+                            type: "not-icontains", // Check if output does not contain forbidden keyword
+                            value: kv,
+                            transform: assertTransforms[format],
+                        })),
+                        ...arrayify(rubrics).map((value) => ({
+                            type: "llm-rubric", // Use LLM rubric for evaluation
+                            value,
+                            transform: assertTransforms[format],
+                        })),
+                        ...arrayify(facts).map((value) => ({
+                            type: "factuality", // Check factuality of output
+                            value,
+                            transform: assertTransforms[format],
+                        })),
+                        ...arrayify(asserts).map((assert) => ({
+                            ...assert,
+                            transform:
+                                assert.transform || assertTransforms[format], // Default transform
+                        })),
+                    ].filter((a) => !!a), // Filter out any undefined assertions
+                })
         ),
     }
 

diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
@@ -507,6 +507,11 @@ interface PromptTest {
      * Additional deterministic assertions.
      */
     asserts?: PromptAssertion | PromptAssertion[]
+
+    /**
+     * Determines what kind of output is sent back to the test engine. Default is "text".
+     */
+    format?: "text" | "json"
 }
 
 interface ContentSafetyOptions {