Ferramenta de web scraping para buscas em múltiplos engines com suporte a proxy, cache LRU e screenshots.
- Busca Web: Pesquisas em múltiplos engines (Bing, DuckDuckGo) com failover automático
- Busca de Imagens: Extração de resultados de imagens do Bing
- Busca de Notícias: Coleta de notícias com metadados (título, fonte, data)
- Busca de Vídeos: Extração de vídeos do Bing com thumbnails
- Screenshots: Captura de páginas web via Puppeteer com múltiplas opções
- Cache LRU: Sistema de cache em memória com TTL configurável
- Rotação de Proxies: Suporte a múltiplos proxies com blacklist automática
🛠️ Tecnologias
- Node.js (ES Modules)
- Axios - Cliente HTTP
- Cheerio - Parsing HTML
- Lodash - Utilitários
- Puppeteer Core - Screenshots
# Clone o repositório
git clone https://github.com/Shadw-Developer/deepquery.git
cd deepquery
# Instale as dependências
npm install
# Instale o Chromium/Chrome (necessário para screenshots)
# Termux:
pkg install chromium
# Ubuntu/Debian:
sudo apt install chromium-browser
# macOS:
brew install chromium💻 Uso
Busca Web
import { dataSearch } from "./src/index.mjs";
// Busca simples
const results = await dataSearch("openai");
console.log(results);
// Retorna: [{ title, link, description, favicon }, ...]
// Com opções
const results = await dataSearch("nodejs", {
lang: "en-US",
limit: 5,
cache: true,
ttl: 120000
});Busca de Imagens
import { imageSearch } from "./src/index.mjs";
const images = await imageSearch("cats", { limit: 10 });
// Retorna: [{ image: "url", title: "..." }, ...]Busca de Notícias
import { newsSearch } from "./src/index.mjs";
const news = await newsSearch("inteligência artificial");
// Retorna: [{ title, link, source, time }, ...]Busca de Vídeos
import { videoSearch } from "./src/index.mjs";
const videos = await videoSearch("tutorial javascript");
// Retorna: [{ title, link, thumbnail }, ...]📸 Screenshots
Screenshot Simples
import { screenshot } from "./src/index.mjs";
const result = await screenshot("https://example.com");
// Com opções
const result = await screenshot("https://example.com", {
outputPath: "./meu-screenshot.png",
width: 1920,
height: 1080,
fullPage: true,
waitUntil: "networkidle0",
timeout: 60000,
delay: 5000
});Batch Screenshots
import { screenshotBatch } from "./src/index.mjs";
const urls = [
"https://example.com",
"https://github.com",
"https://stackoverflow.com"
];
const results = await screenshotBatch(urls, {
batchDelay: 2000,
fullPage: false,
width: 1280,
height: 720
});Screenshot de Elemento
import { screenshotElement } from "./src/index.mjs";
const result = await screenshotElement(
"https://example.com",
"h1",
{ outputPath: "./titulo.png" }
);🔗 Combinando Busca + Screenshot
Exemplo 1: Screenshot do Primeiro Resultado
import { dataSearch, screenshot } from "./src/index.mjs";
async function searchAndCapture(query) {
const results = await dataSearch(query, { limit: 5 });
if (results.length > 0) {
const first = results[0];
const shot = await screenshot(first.link, {
outputPath: `./result-${Date.now()}.png`,
fullPage: true,
delay: 3000
});
return { search: results, screenshot: shot };
}
}
await searchAndCapture("openai gpt-4");Exemplo 2: Screenshots dos Top Resultados
import { dataSearch, screenshotBatch } from "./src/index.mjs";
async function captureTopResults(query, limit = 3) {
const results = await dataSearch(query, { limit });
const urls = results.map(r => r.link);
const screenshots = await screenshotBatch(urls, {
batchDelay: 3000,
fullPage: false,
width: 1920,
height: 1080
});
return results.map((r, i) => ({
...r,
screenshot: screenshots[i]?.path || null
}));
}
const results = await captureTopResults("javascript tutorial", 3);
results.forEach(r => console.log(`${r.title} -> ${r.screenshot}`));Exemplo 3: Monitoramento de Notícias
import { newsSearch, screenshot } from "./src/index.mjs";
async function monitorNews(topic) {
const news = await newsSearch(topic, { limit: 5 });
const results = [];
for (const item of news) {
try {
const shot = await screenshot(item.link, {
outputPath: `./news-${item.source}-${Date.now()}.png`,
delay: 2000
});
results.push({ ...item, screenshot: shot.path });
} catch (err) {
results.push({ ...item, error: err.message });
}
}
return results;
}
const news = await monitorNews("inteligência artificial");Exemplo 4: Catálogo de Imagens
import { imageSearch, screenshot } from "./src/index.mjs";
async function createImageCatalog(query) {
const images = await imageSearch(query, { limit: 10 });
const searchUrl = `https://www.bing.com/images/search?q=${encodeURIComponent(query)}`;
const catalogShot = await screenshot(searchUrl, {
outputPath: `./catalog-${query.replace(/\s+/g, '-')}.png`,
fullPage: true,
delay: 5000
});
return {
query,
images,
catalogScreenshot: catalogShot.path
};
}
const catalog = await createImageCatalog("paisagens natureza");Exemplo 5: Análise de Vídeos
import { videoSearch, screenshot } from "./src/index.mjs";
async function analyzeVideos(query) {
const videos = await videoSearch(query, { limit: 5 });
return await Promise.all(videos.map(async video => {
try {
const shot = await screenshot(video.link, {
outputPath: `./video-${Date.now()}.png`,
delay: 4000
});
return { ...video, screenshot: shot.path };
} catch (err) {
return { ...video, error: err.message };
}
}));
}
const analysis = await analyzeVideos("documentário natureza");Exemplo 6: Relatório Completo
import { dataSearch, imageSearch, newsSearch, screenshotBatch } from "./src/index.mjs";
import fs from "fs/promises";
async function generateReport(query) {
const [web, images, news] = await Promise.all([
dataSearch(query, { limit: 3 }),
imageSearch(query, { limit: 5 }),
newsSearch(query, { limit: 3 })
]);
const screenshots = await screenshotBatch(
web.map(r => r.link),
{ batchDelay: 2000, fullPage: true }
);
const report = {
query,
generatedAt: new Date().toISOString(),
webResults: web.map((r, i) => ({ ...r, screenshot: screenshots[i]?.path })),
images,
news,
summary: {
totalWeb: web.length,
totalImages: images.length,
totalNews: news.length,
screenshotsOk: screenshots.filter(s => s.success).length
}
};
await fs.writeFile(`./report-${query}.json`, JSON.stringify(report, null, 2));
return report;
}
const report = await generateReport("tecnologia 2024");⚙️ Configuração
Edite src/config.mjs:
export const CONFIG = {
engines: { list: ["bing", "duckduckgo"] },
http: { timeout: 8000, retries: 3, baseDelay: 500 },
cache: { defaultTTL: 60000, maxItems: 500 },
proxy: { blockTTL: 60000 },
search: { defaultLang: "pt-BR", defaultLimit: 10 },
screenshot: {
executablePath: "/caminho/do/chromium",
defaultWidth: 1280,
defaultHeight: 800,
defaultTimeout: 60000,
defaultDelay: 3000,
},
};🧪 Testes
node src/tests/data-search.mjs
node src/tests/image-search.mjs
node src/tests/news-search.mjs
node src/tests/video-search.mjs
node src/tests/screenshot-simples.mjs
node src/tests/screenshot-batch.mjs
node src/tests/screenshot-element.mjs📝 Licença
GPL-3.0
👤 Autor
Alisson (mrx_dev)