We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7d8ee36 commit 418c066Copy full SHA for 418c066
hivemind_etl/website/website_etl.py
@@ -1,3 +1,4 @@
1
+import logging
2
from typing import Any
3
4
from hivemind_etl.website.crawlee_client import CrawleeClient
@@ -47,7 +48,13 @@ async def extract(
47
48
"""
49
if not urls:
50
raise ValueError("No URLs provided for crawling")
- extracted_data = await self.crawlee_client.crawl(urls)
51
+
52
+ extracted_data = []
53
+ for url in urls:
54
+ logging.info(f"Crawling {url} and its routes!")
55
+ extracted_data.extend(await self.crawlee_client.crawl(links=[url]))
56
57
+ logging.info(f"Extracted {len(extracted_data)} documents!")
58
59
if not extracted_data:
60
raise ValueError(f"No data extracted from URLs: {urls}")
0 commit comments