Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into feature/action-imp…
Browse files Browse the repository at this point in the history
…lementation
  • Loading branch information
yiwenlu66 committed Dec 22, 2024
2 parents 64897cf + 1e9c4bd commit 187cde9
Show file tree
Hide file tree
Showing 21 changed files with 437 additions and 296 deletions.
2 changes: 1 addition & 1 deletion src/core/eko.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export default class Eko {
throw Error('Not implemented');
}

public registerTool(tool: Tool): void {
public registerTool(tool: Tool<any, any>): void {
throw Error('Not implemented');
}

Expand Down
8 changes: 4 additions & 4 deletions src/core/tool-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import { ToolDefinition } from '../types/llm.types';
import { workflowSchema } from '../schemas/workflow.schema';

export class ToolRegistry {
private tools: Map<string, Tool> = new Map();
private tools: Map<string, Tool<any, any>> = new Map();

registerTool(tool: Tool): void {
registerTool(tool: Tool<any, any>): void {
if (this.tools.has(tool.name)) {
throw new Error(`Tool with name ${tool.name} already registered`);
}
Expand All @@ -21,7 +21,7 @@ export class ToolRegistry {
this.tools.delete(toolName);
}

getTool(toolName: string): Tool {
getTool(toolName: string): Tool<any, any> {
const tool = this.tools.get(toolName);
if (!tool) {
throw new Error(`Tool with name ${toolName} not found`);
Expand All @@ -33,7 +33,7 @@ export class ToolRegistry {
return toolNames.every(name => this.tools.has(name));
}

getAllTools(): Tool[] {
getAllTools(): Tool<any, any>[] {
return Array.from(this.tools.values());
}

Expand Down
27 changes: 11 additions & 16 deletions src/extension/tools/computer.ts → src/extension/tools/browser.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { ScreenshotResult } from '../../types/tools.types';
import { getPageSize, sleep } from '../utils';

export async function key(tabId: number, key: string, coordinate?: [number, number]) {
export async function key(tabId: number, key: string, coordinate?: [number, number]): Promise<any> {
if (!coordinate) {
coordinate = (await cursor_position(tabId)).coordinate;
}
Expand Down Expand Up @@ -50,7 +51,7 @@ export async function key(tabId: number, key: string, coordinate?: [number, numb
return result;
}

export async function type(tabId: number, text: string, coordinate?: [number, number]) {
export async function type(tabId: number, text: string, coordinate?: [number, number]): Promise<any> {
if (!coordinate) {
coordinate = (await cursor_position(tabId)).coordinate;
}
Expand All @@ -62,7 +63,7 @@ export async function type(tabId: number, text: string, coordinate?: [number, nu
});
}

export async function clear_input(tabId: number, coordinate?: [number, number]) {
export async function clear_input(tabId: number, coordinate?: [number, number]): Promise<any> {
if (!coordinate) {
coordinate = (await cursor_position(tabId)).coordinate;
}
Expand All @@ -74,14 +75,14 @@ export async function clear_input(tabId: number, coordinate?: [number, number])
});
}

export async function mouse_move(tabId: number, coordinate: [number, number]) {
export async function mouse_move(tabId: number, coordinate: [number, number]): Promise<any> {
return await chrome.tabs.sendMessage(tabId, {
type: 'computer:mouse_move',
coordinate,
});
}

export async function left_click(tabId: number, coordinate?: [number, number]) {
export async function left_click(tabId: number, coordinate?: [number, number]): Promise<any> {
if (!coordinate) {
coordinate = (await cursor_position(tabId)).coordinate;
}
Expand All @@ -91,7 +92,7 @@ export async function left_click(tabId: number, coordinate?: [number, number]) {
});
}

export async function left_click_drag(tabId: number, coordinate: [number, number]) {
export async function left_click_drag(tabId: number, coordinate: [number, number]): Promise<any> {
let from_coordinate = (await cursor_position(tabId)).coordinate;
return await chrome.tabs.sendMessage(tabId, {
type: 'computer:left_click_drag',
Expand All @@ -100,7 +101,7 @@ export async function left_click_drag(tabId: number, coordinate: [number, number
});
}

export async function right_click(tabId: number, coordinate?: [number, number]) {
export async function right_click(tabId: number, coordinate?: [number, number]): Promise<any> {
if (!coordinate) {
coordinate = (await cursor_position(tabId)).coordinate;
}
Expand All @@ -110,7 +111,7 @@ export async function right_click(tabId: number, coordinate?: [number, number])
});
}

export async function double_click(tabId: number, coordinate?: [number, number]) {
export async function double_click(tabId: number, coordinate?: [number, number]): Promise<any> {
if (!coordinate) {
coordinate = (await cursor_position(tabId)).coordinate;
}
Expand All @@ -120,13 +121,7 @@ export async function double_click(tabId: number, coordinate?: [number, number])
});
}

export async function screenshot(windowId: number): Promise<{
image: {
type: 'base64';
media_type: 'image/png' | 'image/jpeg';
data: string;
};
}> {
export async function screenshot(windowId: number): Promise<ScreenshotResult> {
let dataUrl = await chrome.tabs.captureVisibleTab(windowId as number, {
format: 'jpeg', // jpeg / png
quality: 80, // 0-100
Expand All @@ -141,7 +136,7 @@ export async function screenshot(windowId: number): Promise<{
};
}

export async function scroll_to(tabId: number, coordinate: [number, number]) {
export async function scroll_to(tabId: number, coordinate: [number, number]): Promise<any> {
let from_coordinate = (await cursor_position(tabId)).coordinate;
return await chrome.tabs.sendMessage(tabId, {
type: 'computer:scroll_to',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import { ComputerUseParam, ComputerUseResult } from '../../types/tools.types';
import { Tool, InputSchema, ExecutionContext } from '../../types/action.types';
import { getWindowId, getTabId, sleep } from '../utils';
import * as computer from './computer';
import * as computer from './browser';

/**
* Computer Web for general
* Browser Use for general
*/
export class ComputerWeb implements Tool {
export class BrowserUse implements Tool<ComputerUseParam, ComputerUseResult> {
name: string;
description: string;
input_schema: InputSchema;

constructor() {
this.name = 'computer_web';
this.name = 'browser_use';
this.description = `Use a mouse and keyboard to interact with a computer, and take screenshots.
* This is a browser GUI interface where you do not have access to the address bar or bookmarks. You must operate the browser using inputs like screenshots, mouse, keyboard, etc.
* Some operations may take time to process, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you clicked submit button, but it didn't work, try taking another screenshot.
Expand Down Expand Up @@ -67,44 +68,43 @@ export class ComputerWeb implements Tool {
* @param {*} params { action: 'mouse_move', coordinate: [100, 200] }
* @returns > { success: true, coordinate?: [], image?: { type: 'base64', media_type: 'image/jpeg', data: '/9j...' } }
*/
async execute(context: ExecutionContext, params: unknown): Promise<unknown> {
if (typeof params !== 'object' || params === null || !('action' in params)) {
async execute(context: ExecutionContext, params: ComputerUseParam): Promise<ComputerUseResult> {
if (typeof params !== 'object' || params === null || !params.action) {
throw new Error('Invalid parameters. Expected an object with a "action" property.');
}
let { action, coordinate, text } = params as any;
let tabId = await getTabId(context);
let windowId = await getWindowId(context);
let result;
switch (action as string) {
switch (params.action) {
case 'key':
result = await computer.key(tabId, text, coordinate);
result = await computer.key(tabId, params.text as string, params.coordinate);
await sleep(500);
break;
case 'type':
result = await computer.type(tabId, text, coordinate);
result = await computer.type(tabId, params.text as string, params.coordinate);
await sleep(500);
break;
case 'clear_input':
result = await computer.clear_input(tabId, coordinate);
result = await computer.clear_input(tabId, params.coordinate);
await sleep(100);
break;
case 'mouse_move':
result = await computer.mouse_move(tabId, coordinate);
result = await computer.mouse_move(tabId, params.coordinate as [number, number]);
break;
case 'left_click':
result = await computer.left_click(tabId, coordinate);
result = await computer.left_click(tabId, params.coordinate);
await sleep(100);
break;
case 'left_click_drag':
result = await computer.left_click_drag(tabId, coordinate);
result = await computer.left_click_drag(tabId, params.coordinate as [number, number]);
await sleep(100);
break;
case 'right_click':
result = await computer.right_click(tabId, coordinate);
result = await computer.right_click(tabId, params.coordinate);
await sleep(100);
break;
case 'double_click':
result = await computer.double_click(tabId, coordinate);
result = await computer.double_click(tabId, params.coordinate);
await sleep(100);
break;
case 'screenshot':
Expand All @@ -115,12 +115,12 @@ export class ComputerWeb implements Tool {
result = await computer.cursor_position(tabId);
break;
case 'scroll_to':
result = await computer.scroll_to(tabId, coordinate);
result = await computer.scroll_to(tabId, params.coordinate as [number, number]);
await sleep(1000);
break;
default:
throw Error(
`Invalid parameters. The "${action}" value is not included in the "action" enumeration.`
`Invalid parameters. The "${params.action}" value is not included in the "action" enumeration.`
);
}
return { success: true, ...result };
Expand Down
61 changes: 42 additions & 19 deletions src/extension/tools/element_click.ts
Original file line number Diff line number Diff line change
@@ -1,39 +1,62 @@
import { Tool, InputSchema, ExecutionContext } from "../../types/action.types";
import { Tool, InputSchema, ExecutionContext } from '../../types/action.types';

/**
* Element click
*/
export class ElementClick implements Tool {
export class ElementClick implements Tool<any, any> {
name: string;
description: string;
input_schema: InputSchema;

constructor() {
this.name = "element_click";
this.description = "click element";
this.name = 'element_click';
this.description = 'click element';
this.input_schema = {
type: "object",
type: 'object',
properties: {
element: {
type: "string",
description: "Element title",
type: 'string',
description: 'Element title',
},
},
required: ["element"],
required: ['element'],
};
}

async execute(context: ExecutionContext, params: unknown): Promise<unknown> {
if (
typeof params !== "object" ||
params === null ||
!("content" in params)
) {
throw new Error(
'Invalid parameters. Expected an object with a "content" property.'
async execute(context: ExecutionContext, params: any): Promise<any> {
if (typeof params !== 'object' || params === null || !('content' in params)) {
throw new Error('Invalid parameters. Expected an object with a "content" property.');
}
// button, span, lable, a, img, input, textarea, strlen < 30
// TODO ....
throw new Error('Not implemented');
}
}

function xpath(element: any) {
return (function (element) {
if (element.id !== '') {
return '//*[@id=\"' + element.id + '\"]';
}
if (element == document.body) {
return '/html/' + element.tagName.toLowerCase();
}
var ix = 1,
siblings = element.parentNode.childNodes;
for (var i = 0, l = siblings.length; i < l; i++) {
var sibling = siblings[i];
if (sibling == element) {
return (
arguments.callee(element.parentNode) +
'/' +
element.tagName.toLowerCase() +
'[' +
ix +
']'
);
} else if (sibling.nodeType == 1 && sibling.tagName == element.tagName) {
ix++;
}
// TODO ....
throw new Error('Not implemented')
}
}
})(arguments[0]);
}
Loading

0 comments on commit 187cde9

Please sign in to comment.