diff --git a/.vscode/settings.json b/.vscode/settings.json index 57ecd38b4d..4f571f6343 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,6 +5,7 @@ "Agentic", "AICI", "ANYJS", + "Apim", "arrayify", "Automatable", "bitindex", @@ -79,9 +80,11 @@ "structurify", "sysr", "tabletojson", + "TAVILY", "Textify", "treegrid", "treesitter", + "tvly", "typecheck", "unfence", "urllib", @@ -115,5 +118,24 @@ "mdx": { "parser": "markdown" } + }, + "workbench.colorCustomizations": { + "activityBar.activeBackground": "#443c00", + "activityBar.background": "#443c00", + "activityBar.foreground": "#e7e7e7", + "activityBar.inactiveForeground": "#e7e7e799", + "activityBarBadge.background": "#008071", + "activityBarBadge.foreground": "#e7e7e7", + "commandCenter.border": "#e7e7e799", + "sash.hoverBorder": "#443c00", + "statusBar.background": "#110f00", + "statusBar.foreground": "#e7e7e7", + "statusBarItem.hoverBackground": "#443c00", + "statusBarItem.remoteBackground": "#110f00", + "statusBarItem.remoteForeground": "#e7e7e7", + "titleBar.activeBackground": "#110f00", + "titleBar.activeForeground": "#e7e7e7", + "titleBar.inactiveBackground": "#110f0099", + "titleBar.inactiveForeground": "#e7e7e799" } } diff --git a/docs/src/components/BuiltinAgents.mdx b/docs/src/components/BuiltinAgents.mdx index 8bedd1a505..590d5a5124 100644 --- a/docs/src/components/BuiltinAgents.mdx +++ b/docs/src/components/BuiltinAgents.mdx @@ -13,3 +13,4 @@ import { LinkCard } from '@astrojs/starlight/components'; + diff --git a/docs/src/components/BuiltinTools.mdx b/docs/src/components/BuiltinTools.mdx index e0df464b06..fe46013fa2 100644 --- a/docs/src/components/BuiltinTools.mdx +++ b/docs/src/components/BuiltinTools.mdx @@ -41,7 +41,7 @@ import { LinkCard } from '@astrojs/starlight/components'; - + diff --git a/docs/src/content/docs/index.mdx b/docs/src/content/docs/index.mdx index 67ad5b664e..1a6387f1c9 100644 --- a/docs/src/content/docs/index.mdx +++ b/docs/src/content/docs/index.mdx @@ -258,7 +258,7 @@ The quick brown fox jumps over the lazy dog. -Grep or fuzz search [files](/genaiscript/referen/script/files) +Grep or fuzz search [files](/genaiscript/reference/script/files) ```js wrap const { files } = await workspace.grep(/[a-z][a-z0-9]+/, { globs: "*.md" }) @@ -266,7 +266,17 @@ const { files } = await workspace.grep(/[a-z][a-z0-9]+/, { globs: "*.md" }) - + + +[Web search](/genaiscript/reference/scripts/web-search) using Bing or Tavily. + +```js wrap +const pages = await retreival.webSearch("what are the latest news about AI?") +``` + + + + Browse and scrape the web with [Playwright](/genaiscript/reference/scripts/browse). diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index bea904b0f7..221d8c0b0e 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -360,6 +360,35 @@ defAgent( ````` +### `system.agent_web` + +Agent that can search the web. + + + + + +`````js wrap title="system.agent_web" +system({ + title: "Agent that can search the web.", +}) + +const model = env.vars.agentWebSearchModel + +defAgent( + "web-search", + "search the web to accomplish tasks.", + `Your are a helpful LLM agent that can use web search. + Answer the question in QUERY.`, + { + model, + system: ["system.retrieval_fuzz_search", "system.retrieval_web_search"], + } +) + +````` + + ### `system.annotations` Emits annotations compatible with GitHub Actions @@ -2788,18 +2817,17 @@ Web Search Function to do a web search. -- tool `retrieval_web_search`: Search the web for a user query using Bing Search. +- tool `retrieval_web_search`: Search the web for a user query using Tavily or Bing Search. `````js wrap title="system.retrieval_web_search" system({ title: "Web Search", description: "Function to do a web search.", - secrets: ["BING_SEARCH_ENDPOINT"], }) defTool( "retrieval_web_search", - "Search the web for a user query using Bing Search.", + "Search the web for a user query using Tavily or Bing Search.", { type: "object", properties: { @@ -2807,16 +2835,20 @@ defTool( type: "string", description: "Search query.", }, + count: { + type: "integer", + description: "Number of results to return.", + }, }, required: ["query"], }, async (args) => { - const { query } = args - const webPages = await retrieval.webSearch(query) + const { query, count } = args + const webPages = await retrieval.webSearch(query, { count }) return YAML.stringify( webPages.map((f) => ({ url: f.filename, - snippet: f.content, + content: f.content, })) ) } diff --git a/docs/src/content/docs/reference/scripts/web-search.md b/docs/src/content/docs/reference/scripts/web-search.mdx similarity index 69% rename from docs/src/content/docs/reference/scripts/web-search.md rename to docs/src/content/docs/reference/scripts/web-search.mdx index f36c2be912..2088d8f8ba 100644 --- a/docs/src/content/docs/reference/scripts/web-search.md +++ b/docs/src/content/docs/reference/scripts/web-search.mdx @@ -1,12 +1,12 @@ --- title: Web Search description: Execute web searches with the Bing API using retrieval.webSearch in scripts. -keywords: web search, Bing API, search automation, API configuration, search function +keywords: web search, Bing API, Tavily, search automation, API configuration, search function sidebar: order: 15 --- -The `retrieval.webSearch` executes a web search using the Bing Web Search API. +The `retrieval.webSearch` executes a web search using [Tavily](https://docs.tavily.com/) or the Bing Web Search. ## Web Pages @@ -21,7 +21,16 @@ def("PAGES", webPages) You can use `fetchText` to download the full content of the web page. -## Bing Web Search configuration +## Tavily Configuration + +The [Tavily API](https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search) +provides access to a powerfull search engine for LLM agents. + +```txt title=".env" +TAVILY_API_KEY="your-api-key" +``` + +## Bing Web Search configuration The API uses [Bing Web Search v7](https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/overview) to search the web. To use the API, you need to create a Bing Web Search resource in the Azure portal and store the API key in the `.env` file. diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 7fa822b074..262d8f4cb2 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -87,7 +87,10 @@ export const TRACE_NODE_PREFIX = "genaiscript/trace/" export const EXTENSION_ID = "genaiscript.genaiscript-vscode" export const COPILOT_CHAT_PARTICIPANT_ID = TOOL_ID export const COPILOT_CHAT_PARTICIPANT_SCRIPT_ID = "copilotchat" + export const BING_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search" +export const TAVILY_ENDPOINT = "https://api.tavily.com/search" + export const SYSTEM_FENCE = "\n" export const MAX_DATA_REPAIRS = 1 export const NPM_CLI_PACKAGE = "genaiscript" @@ -196,6 +199,11 @@ export const DOCS_CONFIGURATION_CONTENT_SAFETY_URL = "https://microsoft.github.io/genaiscript/reference/scripts/content-safety" export const DOCS_DEF_FILES_IS_EMPTY_URL = "https://microsoft.github.io/genaiscript/reference/scripts/context/#empty-files" +export const DOCS_WEB_SEARCH_URL = + "https://microsoft.github.io/genaiscript/reference/scripts/web-search/" +export const DOCS_WEB_SEARCH_BING_SEARCH_URL = + "https://microsoft.github.io/genaiscript/reference/scripts/web-search/#bingn" +export const DOCS_WEB_SEARCH_TAVILY_URL = "https://microsoft.github.io/genaiscript/reference/scripts/web-search/#tavily" export const MODEL_PROVIDERS = Object.freeze([ { diff --git a/packages/core/src/genaisrc/system.agent_web.genai.mjs b/packages/core/src/genaisrc/system.agent_web.genai.mjs new file mode 100644 index 0000000000..3dd1d2b617 --- /dev/null +++ b/packages/core/src/genaisrc/system.agent_web.genai.mjs @@ -0,0 +1,16 @@ +system({ + title: "Agent that can search the web.", +}) + +const model = env.vars.agentWebSearchModel + +defAgent( + "web-search", + "search the web to accomplish tasks.", + `Your are a helpful LLM agent that can use web search. + Answer the question in QUERY.`, + { + model, + system: ["system.retrieval_fuzz_search", "system.retrieval_web_search"], + } +) diff --git a/packages/core/src/genaisrc/system.retrieval_web_search.genai.js b/packages/core/src/genaisrc/system.retrieval_web_search.genai.js index 15f8cea9d7..7b2d313c9c 100644 --- a/packages/core/src/genaisrc/system.retrieval_web_search.genai.js +++ b/packages/core/src/genaisrc/system.retrieval_web_search.genai.js @@ -1,12 +1,11 @@ system({ title: "Web Search", description: "Function to do a web search.", - secrets: ["BING_SEARCH_ENDPOINT"], }) defTool( "retrieval_web_search", - "Search the web for a user query using Bing Search.", + "Search the web for a user query using Tavily or Bing Search.", { type: "object", properties: { @@ -14,16 +13,20 @@ defTool( type: "string", description: "Search query.", }, + count: { + type: "integer", + description: "Number of results to return.", + }, }, required: ["query"], }, async (args) => { - const { query } = args - const webPages = await retrieval.webSearch(query) + const { query, count } = args + const webPages = await retrieval.webSearch(query, { count }) return YAML.stringify( webPages.map((f) => ({ url: f.filename, - snippet: f.content, + content: f.content, })) ) } diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index 9d465288fd..a8a8123913 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -7,7 +7,7 @@ import { arrayify, dotGenaiscriptPath } from "./util" import { runtimeHost } from "./host" import { MarkdownTrace } from "./trace" import { createParsers } from "./parsers" -import { bingSearch } from "./websearch" +import { bingSearch, tavilySearch } from "./websearch" import { RunPromptContextNode, createChatGenerationContext, @@ -27,6 +27,7 @@ import { HTMLEscape } from "./html" import { hash } from "./crypto" import { resolveModelConnectionInfo } from "./models" import { createAzureContentSafetyClient } from "./azurecontentsafety" +import { DOCS_WEB_SEARCH_URL } from "./constants" /** * Creates a prompt context for the given project, variables, trace, options, and model. @@ -108,20 +109,27 @@ export async function createPromptContext( // Define retrieval operations const retrieval: Retrieval = { - webSearch: async (q) => { + webSearch: async (q, options) => { + const { provider, count } = options || {} // Conduct a web search and return the results try { trace.startDetails( `🌐 web search ${HTMLEscape(q)}` ) - const { webPages } = (await bingSearch(q, { trace })) || {} - const files = webPages?.value?.map( - ({ url, snippet }) => - { - filename: url, - content: snippet, - } - ) + let files: WorkspaceFile[] + if (provider === "bing") files = await bingSearch(q, { trace, count }) + else if (provider === "tavily") + files = await tavilySearch(q, { trace, count }) + else { + for (const f of [bingSearch, tavilySearch]) { + files = await f(q, { ignoreMissingApiKey: true, trace, count }) + if (files) break + } + } + if (!files) + throw new Error( + `No search provider configured. See ${DOCS_WEB_SEARCH_URL}.` + ) trace.files(files, { model, secrets: env.secrets }) return files } finally { diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index b3b08c30b2..e568618b1c 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -2122,10 +2122,13 @@ interface FuzzSearchOptions { interface Retrieval { /** - * Executers a Bing web search. Requires to configure the BING_SEARCH_API_KEY secret. + * Executers a web search with Tavily or Bing Search. * @param query */ - webSearch(query: string): Promise + webSearch( + query: string, + options?: { count?: number; provider?: "tavily" | "bing" } + ): Promise /** * Search using similarity distance on embeddings diff --git a/packages/core/src/websearch.ts b/packages/core/src/websearch.ts index 576ee06d06..f75d1f1b32 100644 --- a/packages/core/src/websearch.ts +++ b/packages/core/src/websearch.ts @@ -1,7 +1,13 @@ -import { BING_SEARCH_ENDPOINT } from "./constants" +import { + BING_SEARCH_ENDPOINT, + DOCS_WEB_SEARCH_BING_SEARCH_URL, + DOCS_WEB_SEARCH_TAVILY_URL, + TAVILY_ENDPOINT, +} from "./constants" import { createFetch } from "./fetch" import { runtimeHost } from "./host" -import { MarkdownTrace } from "./trace" +import { MarkdownTrace, TraceOptions } from "./trace" +import { deleteUndefinedValues, logVerbose } from "./util" /** * Converts an object into a URL search parameters string. @@ -19,23 +25,6 @@ function toURLSearchParams(o: any) { return params.toString() } -/** - * Interface representing the response from a search query. - */ -export interface SearchResponse { - webPages?: { - value: WebpageResponse[] - } -} - -/** - * Interface representing a single webpage response. - */ -export interface WebpageResponse { - snippet: string - url: string -} - /** * Performs a Bing search using the given query and options. * Utilizes Bing Search API and constructs the request with query parameters. @@ -48,16 +37,17 @@ export interface WebpageResponse { export async function bingSearch( q: string, options?: { - trace?: MarkdownTrace + ignoreMissingApiKey?: boolean endPoint?: string count?: number cc?: string freshness?: string responseFilter?: string safeSearch?: string - } -): Promise { + } & TraceOptions +): Promise { const { + ignoreMissingApiKey, trace, endPoint = BING_SEARCH_ENDPOINT, count, @@ -68,14 +58,17 @@ export async function bingSearch( } = options || {} // Return an empty response if the query is empty. - if (!q) return {} + if (!q) return [] // Retrieve the API key from the runtime host. const apiKey = await runtimeHost.readSecret("BING_SEARCH_API_KEY") - if (!apiKey) + if (!apiKey) { + if (ignoreMissingApiKey) return undefined throw new Error( - "BING_SEARCH_API_KEY secret is required to use bing search. See https://microsoft.github.io/genaiscript/reference/scripts/web-search/#bing-web-search-configuration." + `BING_SEARCH_API_KEY secret is required to use bing search. See ${DOCS_WEB_SEARCH_BING_SEARCH_URL}.`, + { cause: "missing key" } ) + } // Construct the query string using provided and default parameters. const query = toURLSearchParams({ @@ -91,7 +84,7 @@ export async function bingSearch( const url = endPoint + "?" + query // Create a fetch function for making the HTTP request. - const fetch = await createFetch() + const fetch = await createFetch({ trace }) const res = await fetch(url, { method: "GET", headers: { @@ -100,16 +93,107 @@ export async function bingSearch( }) // Log the search response status for tracing purposes. - trace?.itemValue(`Bing search`, res.statusText) + trace?.itemValue(`Bing search`, res.status + " " + res.statusText) // Throw an error if the response is not OK, and log details for debugging. if (!res.ok) { trace?.detailsFenced("error response", await res.text()) - throw new Error(`Bing search failed: ${res.statusText}`) + throw new Error(`Bing search failed: ${res.status} ${res.statusText}`) + } + + // Parse and return the JSON response, logging the results. + const json = (await res.json()) as { + webPages?: { + value: { + snippet: string + url: string + }[] + } + } + trace?.detailsFenced("results", json, "yaml") + return ( + json.webPages?.value?.map( + ({ snippet, url }) => + ({ filename: url, content: snippet }) satisfies WorkspaceFile + ) || [] + ) +} + +/** + * Performs a Tavily search using the given query and options. + * Utilizes Tavily Search API and constructs the request with query parameters. + * Handles API key retrieval and error management. + * @param q - The search query string. + * @param options - Optional search parameters such as trace, endpoint, count, etc. + * @returns A Promise resolving to a SearchResponse. + * @throws Error if the API key is missing or if the search request fails. + */ +export async function tavilySearch( + q: string, + options?: { + ignoreMissingApiKey?: boolean + endPoint?: string + count?: number + } & TraceOptions +): Promise { + const { + trace, + count, + ignoreMissingApiKey, + endPoint = TAVILY_ENDPOINT, + } = options || {} + + // Return an empty response if the query is empty. + if (!q) return [] + + // Retrieve the API key from the runtime host. + const apiKey = await runtimeHost.readSecret("TAVILY_API_KEY") + if (!apiKey) { + if (ignoreMissingApiKey) return undefined + throw new Error( + `TAVILY_API_KEY secret is required to use Tavily search. See ${DOCS_WEB_SEARCH_TAVILY_URL}.`, + { cause: "missing key" } + ) + } + + // Construct the query string using provided and default parameters. + const body = deleteUndefinedValues({ + query: q, + api_key: apiKey, + max_results: count, + }) + + // Create a fetch function for making the HTTP request. + const fetch = await createFetch({ trace }) + const res = await fetch(endPoint, { + method: "POST", + headers: { + ["Content-Type"]: "application/json", + Accept: "application/json", + }, + retryOn: [429], + body: JSON.stringify(body), + }) + + // Log the search response status for tracing purposes. + trace?.itemValue(`Tavily search`, res.status + " " + res.statusText) + + // Throw an error if the response is not OK, and log details for debugging. + if (!res.ok) { + const err = await res.text() + trace?.detailsFenced("error response", err) + logVerbose(err) + throw new Error(`Tavily search failed: ${res.status} ${res.statusText}`) } // Parse and return the JSON response, logging the results. - const json = await res.json() + const json: { + query: string + results: { url: string; content: string }[] + } = await res.json() trace?.detailsFenced("results", json, "yaml") - return json + return json.results.map( + ({ url, content }) => + ({ filename: url, content }) satisfies WorkspaceFile + ) } diff --git a/packages/sample/genaisrc/bingsearch.genai.js b/packages/sample/genaisrc/bingsearch.genai.js deleted file mode 100644 index f9c2fb20aa..0000000000 --- a/packages/sample/genaisrc/bingsearch.genai.js +++ /dev/null @@ -1,6 +0,0 @@ -script({ - title: "bing search", -}) - -const webPages = await retrieval.webSearch("microsoft") -def("RES", webPages, { language: "json" }) diff --git a/packages/sample/genaisrc/copilot/dataanalyst.genai.mjs b/packages/sample/genaisrc/copilot/dataanalyst.genai.mjs new file mode 100644 index 0000000000..e0989b1ad5 --- /dev/null +++ b/packages/sample/genaisrc/copilot/dataanalyst.genai.mjs @@ -0,0 +1,3 @@ +def("DATA", env.files) +def("QUESTION", env.vars.question) +$`` \ No newline at end of file diff --git a/packages/sample/genaisrc/copilot/mermaid.genai.mjs b/packages/sample/genaisrc/copilot/mermaid.genai.mjs index ce9af3e7e6..173af6dc1a 100644 --- a/packages/sample/genaisrc/copilot/mermaid.genai.mjs +++ b/packages/sample/genaisrc/copilot/mermaid.genai.mjs @@ -1,2 +1,6 @@ +/** + * Inspired by mermAid + * @see https://marketplace.visualstudio.com/items?itemName=ms-vscode.copilot-mermaid-diagram + */ def("CODE", env.files) $`Generate a class diagram using mermaid of the code symbols in the CODE.` \ No newline at end of file diff --git a/packages/sample/genaisrc/websearch.genai.js b/packages/sample/genaisrc/websearch.genai.js new file mode 100644 index 0000000000..c2ab65eec9 --- /dev/null +++ b/packages/sample/genaisrc/websearch.genai.js @@ -0,0 +1,13 @@ +script({ + title: "web search search", +}) + +const webPages = await retrieval.webSearch( + "what are the last nvidia results?", + { + provider: env.vars.provider, + } +) +console.log(webPages) +def("PAGES", webPages) +$`Summarize pages.`