Skip to content

Commit

Permalink
kate/tf 3913 application collect ecommerce pricing data (#81)
Browse files Browse the repository at this point in the history
* collect pricing data example

* update lint

* Update javascript-sdk/examples/collect-pricing-data/README.md

Co-authored-by: R L Nabors <rachelnabors@users.noreply.github.com>

---------

Co-authored-by: R L Nabors <rachelnabors@users.noreply.github.com>
  • Loading branch information
KateZhang98 and rachelnabors authored Oct 29, 2024
1 parent c9ea375 commit 8ee3b01
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 0 deletions.
17 changes: 17 additions & 0 deletions javascript-sdk/examples/collect-pricing-data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Example script: collecting pricing data from e-commerce website using AgentQL

This is an example of collecting pricing data from e-commerce website using AgentQL.

## Run the script

- [Install AgentQL SDK](https://docs.agentql.com/javascript-sdk/installation)
- Save this JavaScript file locally as **collect_pricing_data.js**
- Run the following command from the project's folder:

```bash
node collect_pricing_data.js
```

## Play with the query

Install the [AgentQL Debugger Chrome extension](https://docs.agentql.com/installation/chrome-extension-installation) to play with the AgentQL query. [Learn more about the AgentQL query language](https://docs.agentql.com/agentql-query/query-intro)
113 changes: 113 additions & 0 deletions javascript-sdk/examples/collect-pricing-data/collect_pricing_data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
const { wrap, configure } = require('agentql');
const { chromium } = require('playwright');
require('dotenv').config({ path: '.env.local' });

const URL = 'https://www.bestbuy.com';

async function doExtractPricingData(page) {
//Extract pricing data from the current page.
//Args: page (Page): The Playwright page object to interact with the browser.
//Returns: list: The pricing data extracted from the page.

const QUERY = `
{
products[] {
name
model
sku
price
}
}`;
const pricingData = await page.queryData(QUERY);
return pricingData.products || [];
}

async function searchProduct(page, product, minPrice, maxPrice) {
const searchInput = await page.getByPrompt('the search input field');
if (!searchInput) {
console.log('Search input field not found.');
return false;
}
await searchInput.type(product, { delay: 200 });
await searchInput.press('Enter');

const minPriceInput = await page.getByPrompt('the min price input field');
if (!minPriceInput) {
console.log('Min price input field not found.');
return false;
}
await minPriceInput.fill(String(minPrice));

const maxPriceInput = await page.getByPrompt('the max price input field');
if (!maxPriceInput) {
console.log('Max price input field not found.');
return false;
}
await maxPriceInput.fill(String(maxPrice));
await maxPriceInput.press('Enter');
return true;
}

async function goToTheNextPage(page) {
const nextPageQuery = `
{
pagination {
prev_page_url
next_page_url
}
}`;
console.log('Navigating to the next page...');
const pagination = await page.queryData(nextPageQuery);
let nextPageUrl = pagination.pagination?.next_page_url;
if (!nextPageUrl) {
return false;
}
try {
if (!nextPageUrl.startsWith('http')) {
nextPageUrl = URL + nextPageUrl;
}
await page.goto(nextPageUrl);
return true;
} catch (error) {
console.error(error);
return false;
}
}

async function extractPricingData(page, product, minPrice, maxPrice, maxPages = 3) {
console.log(`Searching for product: ${product} with price range: $${minPrice} - $${maxPrice}`);
if (!(await searchProduct(page, product, minPrice, maxPrice))) {
console.log('Failed to search for the product.');
return [];
}

let currentPage = 1;
const pricingData = [];
while (currentPage <= maxPages) {
console.log(`Extracting pricing data on page ${currentPage}...`);
const pricingDataOnPage = await doExtractPricingData(page);
console.log(`${pricingDataOnPage.length} products found`);

pricingData.push(...pricingDataOnPage);

if (!(await goToTheNextPage(page))) {
console.log('No more next page.');
break;
}

currentPage += 1;
}

return pricingData;
}
(async () => {
configure({ apiKey: process.env.AGENTQL_API_KEY });
const browser = await chromium.launch({ headless: false });
const page = wrap(await browser.newPage());
await page.goto(URL);

const pricingData = await extractPricingData(page, 'gpu', 500, 800);
console.log('Pricing data:', pricingData);

await browser.close();
})();

0 comments on commit 8ee3b01

Please sign in to comment.