Skip to content

Commit

Permalink
Determining page layout (#661)
Browse files Browse the repository at this point in the history
  • Loading branch information
hillary-mutisya authored Feb 4, 2025
1 parent 28c43a0 commit 7901207
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 21 deletions.
6 changes: 5 additions & 1 deletion ts/packages/agents/browser/src/agent/browserConnector.mts
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,17 @@ export class BrowserConnector {
return [];
}

async getFilteredHtmlFragments(inputHtmlFragments: any[]) {
async getFilteredHtmlFragments(
inputHtmlFragments: any[],
cssSelectorsToKeep: string[],
) {
let htmlFragments: any[] = [];
const timeoutPromise = new Promise((f) => setTimeout(f, 5000));
const filterAction = {
actionName: "getFilteredHTMLFragments",
parameters: {
fragments: inputHtmlFragments,
cssSelectorsToKeep: cssSelectorsToKeep,
},
};

Expand Down
21 changes: 21 additions & 0 deletions ts/packages/agents/browser/src/agent/discovery/actionHandler.mts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ export async function handleSchemaDiscoveryAction(
case "summarizePage":
await handleGetPageSummary(action);
break;
case "findPageComponents":
await handleGetPageComponents(action);
break;
}

async function handleFindUserActions(action: any) {
Expand Down Expand Up @@ -85,5 +88,23 @@ export async function handleSchemaDiscoveryAction(
return response.data;
}

async function handleGetPageComponents(action: any) {
const htmlFragments = await browser.getHtmlFragments();
const timerName = `Getting page layout`;
console.time(timerName);
const response = await agent.getPageLayout(undefined, htmlFragments);

if (!response.success) {
console.error("Attempt to get page layout failed");
console.error(response.message);
return;
}

console.timeEnd(timerName);
message = "Page layout: \n" + JSON.stringify(response.data, null, 2);

return response.data;
}

return message;
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,10 @@ export type FindPageComponents = {

export type FindUserActions = {
actionName: "findUserActions";
parameters: {
allowDuplicates?: boolean;
};
};

export type SummarizePage = {
actionName: "summarizePage";
parameters: {
allowDuplicates?: boolean;
};
};

export type SaveUserActions = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

export type PageLayout = {
headerCSSSelector: string;
footerCSSSelector: string;
navigationLinksCSSSelector: string;
mainContentCSSSelector: string;
};
86 changes: 76 additions & 10 deletions ts/packages/agents/browser/src/agent/discovery/translator.mts
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,18 @@ function getScreenshotPromptSection(
url: screenshot,
},
});
}
if (fragments) {
const textFragments = fragments.map((a) => a.text);
screenshotSection.push({
type: "text",
text: `Here is the text content of the page

if (fragments) {
const textFragments = fragments.map((a) => a.text);
screenshotSection.push({
type: "text",
text: `Here is the text content of the page
'''
${textFragments}
'''
`,
});
});
}
}
return screenshotSection;
}
Expand Down Expand Up @@ -176,7 +177,6 @@ export class SchemaDiscoveryAgent<T extends object> {
requestSection.push({
type: "text",
text: `
Here is user request
'''
${userRequest}
Expand Down Expand Up @@ -266,7 +266,6 @@ export class SchemaDiscoveryAgent<T extends object> {
requestSection.push({
type: "text",
text: `
Here is user request
'''
${userRequest}
Expand Down Expand Up @@ -366,7 +365,74 @@ export class SchemaDiscoveryAgent<T extends object> {
type: "text",
text: `
Examine the layout information provided and determine the content of the page and the actions users can take on it.
Once you have this list, a SINGLE "PageDescription" response using the typescript schema below.
Once you have this list, a SINGLE "${bootstrapTranslator.validator.getTypeName()}" response using the typescript schema below.
'''
${bootstrapTranslator.validator.getSchemaText()}
'''
`,
},
...requestSection,
{
type: "text",
text: `
The following is the COMPLETE JSON response object with 2 spaces of indentation and no properties with the value undefined:
`,
},
];

const response = await bootstrapTranslator.translate("", [
{ role: "user", content: JSON.stringify(promptSections) },
]);
return response;
}

async getPageLayout(
userRequest?: string,
fragments?: HtmlFragments[],
screenshot?: string,
) {
const packageRoot = path.join("..", "..", "..");
const resultsSchema = await fs.promises.readFile(
fileURLToPath(
new URL(
path.join(packageRoot, "./src/agent/discovery/schema/PageLayout.mts"),
import.meta.url,
),
),
"utf8",
);

const bootstrapTranslator = this.getBootstrapTranslator(
"PageLayout",
resultsSchema,
);

const screenshotSection = getScreenshotPromptSection(screenshot, fragments);
const htmlSection = getHtmlPromptSection(fragments);
const prefixSection = getBootstrapPrefixPromptSection();
let requestSection = [];
if (userRequest) {
requestSection.push({
type: "text",
text: `
Here is user request
'''
${userRequest}
'''
`,
});
}
const promptSections = [
...prefixSection,
...screenshotSection,
...htmlSection,
{
type: "text",
text: `
Examine the layout information provided and determine the content of the page and the actions users can take on it.
Once you have this list, a SINGLE "${bootstrapTranslator.validator.getTypeName()}" response using the typescript schema below.
'''
${bootstrapTranslator.validator.getSchemaText()}
Expand Down
5 changes: 5 additions & 0 deletions ts/packages/agents/browser/src/extension/htmlReducer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export class HTMLReducer {
"nocontent",
"noscript",
"template",
"img",
];

mediaElementSelectors: string[] = [
Expand All @@ -68,6 +69,10 @@ export class HTMLReducer {
"clickid",
"fetchpriority",
"srcset",
"aria-busy",
"aria-haspopup",
"aria-autocomplete",
"href",
];

attribsToReplace: Set<string> = new Set(["href", "src"]);
Expand Down
7 changes: 3 additions & 4 deletions ts/packages/agents/browser/src/extension/serviceWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ async function getTabHTMLFragmentsBySize(
async function getFilteredHTMLFragments(
targetTab: chrome.tabs.Tab,
inputHtmlFragments: any[],
cssSelectorsToKeep: string[],
) {
let htmlFragments: any[] = [];

Expand All @@ -740,10 +741,7 @@ async function getFilteredHTMLFragments(
{
type: "get_filtered_html_fragments",
inputHtml: inputHtmlFragments[i].content,
cssSelectors: [
inputHtmlFragments[i].cssSelectorAcross,
inputHtmlFragments[i].cssSelectorDown,
].join(", "),
cssSelectors: cssSelectorsToKeep.join(", "),
frameId: inputHtmlFragments[i].frameId,
},
{ frameId: inputHtmlFragments[i].frameId },
Expand Down Expand Up @@ -1165,6 +1163,7 @@ async function runBrowserAction(action: any) {
responseObject = await getFilteredHTMLFragments(
targetTab,
action.parameters.fragments,
action.parameters.cssSelectorsToKeep,
);
break;
}
Expand Down

0 comments on commit 7901207

Please sign in to comment.