Skip to content

Commit

Permalink
Integrate Tavily search API into web search functionality (#860)
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan authored Nov 14, 2024
1 parent 5df73e0 commit 6b66593
Show file tree
Hide file tree
Showing 16 changed files with 275 additions and 65 deletions.
22 changes: 22 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"Agentic",
"AICI",
"ANYJS",
"Apim",
"arrayify",
"Automatable",
"bitindex",
Expand Down Expand Up @@ -79,9 +80,11 @@
"structurify",
"sysr",
"tabletojson",
"TAVILY",
"Textify",
"treegrid",
"treesitter",
"tvly",
"typecheck",
"unfence",
"urllib",
Expand Down Expand Up @@ -115,5 +118,24 @@
"mdx": {
"parser": "markdown"
}
},
"workbench.colorCustomizations": {
"activityBar.activeBackground": "#443c00",
"activityBar.background": "#443c00",
"activityBar.foreground": "#e7e7e7",
"activityBar.inactiveForeground": "#e7e7e799",
"activityBarBadge.background": "#008071",
"activityBarBadge.foreground": "#e7e7e7",
"commandCenter.border": "#e7e7e799",
"sash.hoverBorder": "#443c00",
"statusBar.background": "#110f00",
"statusBar.foreground": "#e7e7e7",
"statusBarItem.hoverBackground": "#443c00",
"statusBarItem.remoteBackground": "#110f00",
"statusBarItem.remoteForeground": "#e7e7e7",
"titleBar.activeBackground": "#110f00",
"titleBar.activeForeground": "#e7e7e7",
"titleBar.inactiveBackground": "#110f0099",
"titleBar.inactiveForeground": "#e7e7e799"
}
}
1 change: 1 addition & 0 deletions docs/src/components/BuiltinAgents.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ import { LinkCard } from '@astrojs/starlight/components';
<LinkCard title="agent interpreter" description="run code interpreters for Python, Math. Use this agent to ground computation questions." href="/genaiscript/reference/scripts/system#systemagent_interpreter" />
<LinkCard title="agent planner" description="generates a plan to solve a task" href="/genaiscript/reference/scripts/system#systemagent_planner" />
<LinkCard title="agent user_input" description="ask user for input to confirm, select or answer the question in the query. The message should be very clear and provide all the context." href="/genaiscript/reference/scripts/system#systemagent_user_input" />
<LinkCard title="agent web-search" description="search the web to accomplish tasks." href="/genaiscript/reference/scripts/system#systemagent_web" />
2 changes: 1 addition & 1 deletion docs/src/components/BuiltinTools.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import { LinkCard } from '@astrojs/starlight/components';
<LinkCard title="python_code_interpreter_read_file" description="Reads a file from the container file system. No absolute paths." href="/genaiscript/reference/scripts/system#systempython_code_interpreter" />
<LinkCard title="retrieval_fuzz_search" description="Search for keywords using the full text of files and a fuzzy distance." href="/genaiscript/reference/scripts/system#systemretrieval_fuzz_search" />
<LinkCard title="retrieval_vector_search" description="Search files using embeddings and similarity distance." href="/genaiscript/reference/scripts/system#systemretrieval_vector_search" />
<LinkCard title="retrieval_web_search" description="Search the web for a user query using Bing Search." href="/genaiscript/reference/scripts/system#systemretrieval_web_search" />
<LinkCard title="retrieval_web_search" description="Search the web for a user query using Tavily or Bing Search." href="/genaiscript/reference/scripts/system#systemretrieval_web_search" />
<LinkCard title="user_input_confirm" description="Ask the user to confirm a message." href="/genaiscript/reference/scripts/system#systemuser_input" />
<LinkCard title="user_input_select" description="Ask the user to select an option." href="/genaiscript/reference/scripts/system#systemuser_input" />
<LinkCard title="user_input_text" description="Ask the user to input text." href="/genaiscript/reference/scripts/system#systemuser_input" />
Expand Down
14 changes: 12 additions & 2 deletions docs/src/content/docs/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,25 @@ The quick brown fox jumps over the lazy dog.

<Card title="File search" icon="document">

Grep or fuzz search [files](/genaiscript/referen/script/files)
Grep or fuzz search [files](/genaiscript/reference/script/files)

```js wrap
const { files } = await workspace.grep(/[a-z][a-z0-9]+/, { globs: "*.md" })
```

</Card>

<Card title="Browser automation" icon="document">
<Card title="Web search" icon="magnifier">

[Web search](/genaiscript/reference/scripts/web-search) using Bing or Tavily.

```js wrap
const pages = await retreival.webSearch("what are the latest news about AI?")
```

</Card>

<Card title="Browser automation" icon="approve-check-circle">

Browse and scrape the web with [Playwright](/genaiscript/reference/scripts/browse).

Expand Down
44 changes: 38 additions & 6 deletions docs/src/content/docs/reference/scripts/system.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,35 @@ defAgent(
`````


### `system.agent_web`

Agent that can search the web.





`````js wrap title="system.agent_web"
system({
title: "Agent that can search the web.",
})

const model = env.vars.agentWebSearchModel

defAgent(
"web-search",
"search the web to accomplish tasks.",
`Your are a helpful LLM agent that can use web search.
Answer the question in QUERY.`,
{
model,
system: ["system.retrieval_fuzz_search", "system.retrieval_web_search"],
}
)

`````


### `system.annotations`

Emits annotations compatible with GitHub Actions
Expand Down Expand Up @@ -2788,35 +2817,38 @@ Web Search
Function to do a web search.
- tool `retrieval_web_search`: Search the web for a user query using Bing Search.
- tool `retrieval_web_search`: Search the web for a user query using Tavily or Bing Search.
`````js wrap title="system.retrieval_web_search"
system({
title: "Web Search",
description: "Function to do a web search.",
secrets: ["BING_SEARCH_ENDPOINT"],
})
defTool(
"retrieval_web_search",
"Search the web for a user query using Bing Search.",
"Search the web for a user query using Tavily or Bing Search.",
{
type: "object",
properties: {
query: {
type: "string",
description: "Search query.",
},
count: {
type: "integer",
description: "Number of results to return.",
},
},
required: ["query"],
},
async (args) => {
const { query } = args
const webPages = await retrieval.webSearch(query)
const { query, count } = args
const webPages = await retrieval.webSearch(query, { count })
return YAML.stringify(
webPages.map((f) => ({
url: f.filename,
snippet: f.content,
content: f.content,
}))
)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
---
title: Web Search
description: Execute web searches with the Bing API using retrieval.webSearch in scripts.
keywords: web search, Bing API, search automation, API configuration, search function
keywords: web search, Bing API, Tavily, search automation, API configuration, search function
sidebar:
order: 15
---

The `retrieval.webSearch` executes a web search using the Bing Web Search API.
The `retrieval.webSearch` executes a web search using [Tavily](https://docs.tavily.com/) or the Bing Web Search.

## Web Pages

Expand All @@ -21,7 +21,16 @@ def("PAGES", webPages)

You can use `fetchText` to download the full content of the web page.

## Bing Web Search configuration
## Tavily Configuration <a href="" id="tavily" />

The [Tavily API](https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search)
provides access to a powerfull search engine for LLM agents.

```txt title=".env"
TAVILY_API_KEY="your-api-key"
```

## Bing Web Search configuration <a href="" id="bing" />

The API uses [Bing Web Search v7](https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/overview) to search the web. To use the API, you need to create a Bing Web Search resource in the Azure portal and store the API key in the `.env` file.

Expand Down
8 changes: 8 additions & 0 deletions packages/core/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ export const TRACE_NODE_PREFIX = "genaiscript/trace/"
export const EXTENSION_ID = "genaiscript.genaiscript-vscode"
export const COPILOT_CHAT_PARTICIPANT_ID = TOOL_ID
export const COPILOT_CHAT_PARTICIPANT_SCRIPT_ID = "copilotchat"

export const BING_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search"
export const TAVILY_ENDPOINT = "https://api.tavily.com/search"

export const SYSTEM_FENCE = "\n"
export const MAX_DATA_REPAIRS = 1
export const NPM_CLI_PACKAGE = "genaiscript"
Expand Down Expand Up @@ -196,6 +199,11 @@ export const DOCS_CONFIGURATION_CONTENT_SAFETY_URL =
"https://microsoft.github.io/genaiscript/reference/scripts/content-safety"
export const DOCS_DEF_FILES_IS_EMPTY_URL =
"https://microsoft.github.io/genaiscript/reference/scripts/context/#empty-files"
export const DOCS_WEB_SEARCH_URL =
"https://microsoft.github.io/genaiscript/reference/scripts/web-search/"
export const DOCS_WEB_SEARCH_BING_SEARCH_URL =
"https://microsoft.github.io/genaiscript/reference/scripts/web-search/#bingn"
export const DOCS_WEB_SEARCH_TAVILY_URL = "https://microsoft.github.io/genaiscript/reference/scripts/web-search/#tavily"

export const MODEL_PROVIDERS = Object.freeze([
{
Expand Down
16 changes: 16 additions & 0 deletions packages/core/src/genaisrc/system.agent_web.genai.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
system({
title: "Agent that can search the web.",
})

const model = env.vars.agentWebSearchModel

defAgent(
"web-search",
"search the web to accomplish tasks.",
`Your are a helpful LLM agent that can use web search.
Answer the question in QUERY.`,
{
model,
system: ["system.retrieval_fuzz_search", "system.retrieval_web_search"],
}
)
13 changes: 8 additions & 5 deletions packages/core/src/genaisrc/system.retrieval_web_search.genai.js
Original file line number Diff line number Diff line change
@@ -1,29 +1,32 @@
system({
title: "Web Search",
description: "Function to do a web search.",
secrets: ["BING_SEARCH_ENDPOINT"],
})

defTool(
"retrieval_web_search",
"Search the web for a user query using Bing Search.",
"Search the web for a user query using Tavily or Bing Search.",
{
type: "object",
properties: {
query: {
type: "string",
description: "Search query.",
},
count: {
type: "integer",
description: "Number of results to return.",
},
},
required: ["query"],
},
async (args) => {
const { query } = args
const webPages = await retrieval.webSearch(query)
const { query, count } = args
const webPages = await retrieval.webSearch(query, { count })
return YAML.stringify(
webPages.map((f) => ({
url: f.filename,
snippet: f.content,
content: f.content,
}))
)
}
Expand Down
28 changes: 18 additions & 10 deletions packages/core/src/promptcontext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { arrayify, dotGenaiscriptPath } from "./util"
import { runtimeHost } from "./host"
import { MarkdownTrace } from "./trace"
import { createParsers } from "./parsers"
import { bingSearch } from "./websearch"
import { bingSearch, tavilySearch } from "./websearch"
import {
RunPromptContextNode,
createChatGenerationContext,
Expand All @@ -27,6 +27,7 @@ import { HTMLEscape } from "./html"
import { hash } from "./crypto"
import { resolveModelConnectionInfo } from "./models"
import { createAzureContentSafetyClient } from "./azurecontentsafety"
import { DOCS_WEB_SEARCH_URL } from "./constants"

/**
* Creates a prompt context for the given project, variables, trace, options, and model.
Expand Down Expand Up @@ -108,20 +109,27 @@ export async function createPromptContext(

// Define retrieval operations
const retrieval: Retrieval = {
webSearch: async (q) => {
webSearch: async (q, options) => {
const { provider, count } = options || {}
// Conduct a web search and return the results
try {
trace.startDetails(
`🌐 web search <code>${HTMLEscape(q)}</code>`
)
const { webPages } = (await bingSearch(q, { trace })) || {}
const files = webPages?.value?.map(
({ url, snippet }) =>
<WorkspaceFile>{
filename: url,
content: snippet,
}
)
let files: WorkspaceFile[]
if (provider === "bing") files = await bingSearch(q, { trace, count })
else if (provider === "tavily")
files = await tavilySearch(q, { trace, count })
else {
for (const f of [bingSearch, tavilySearch]) {
files = await f(q, { ignoreMissingApiKey: true, trace, count })
if (files) break
}
}
if (!files)
throw new Error(
`No search provider configured. See ${DOCS_WEB_SEARCH_URL}.`
)
trace.files(files, { model, secrets: env.secrets })
return files
} finally {
Expand Down
7 changes: 5 additions & 2 deletions packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2122,10 +2122,13 @@ interface FuzzSearchOptions {

interface Retrieval {
/**
* Executers a Bing web search. Requires to configure the BING_SEARCH_API_KEY secret.
* Executers a web search with Tavily or Bing Search.
* @param query
*/
webSearch(query: string): Promise<WorkspaceFile[]>
webSearch(
query: string,
options?: { count?: number; provider?: "tavily" | "bing" }
): Promise<WorkspaceFile[]>

/**
* Search using similarity distance on embeddings
Expand Down
Loading

0 comments on commit 6b66593

Please sign in to comment.