File tree Expand file tree Collapse file tree 2 files changed +13
-4
lines changed
Expand file tree Collapse file tree 2 files changed +13
-4
lines changed Original file line number Diff line number Diff line change 11import asyncio
22from typing import Any
3+ import uuid
34
45from crawlee .playwright_crawler import PlaywrightCrawler , PlaywrightCrawlingContext
56from defusedxml import ElementTree as ET
@@ -21,6 +22,12 @@ def __init__(
2122 # do not persist crawled data to local storage
2223 self .crawler ._configuration .persist_storage = False
2324 self .crawler ._configuration .write_metadata = False
25+ self .crawler ._configuration .purge_on_start = True
26+
27+ # changing the id each time so it wouldn't continue
28+ # fetching the previous links
29+ config = self .crawler ._configuration .get_global_configuration ()
30+ config .default_request_queue_id = uuid .uuid4 ().hex
2431
2532 @self .crawler .router .default_handler
2633 async def request_handler (context : PlaywrightCrawlingContext ) -> None :
Original file line number Diff line number Diff line change @@ -30,15 +30,17 @@ async def test_extract(self):
3030 "title" : "Example" ,
3131 }
3232 ]
33-
33+
3434 # Mock the CrawleeClient class instead of the instance
35- with patch ('hivemind_etl.website.website_etl.CrawleeClient' ) as MockCrawleeClient :
35+ with patch (
36+ "hivemind_etl.website.website_etl.CrawleeClient"
37+ ) as MockCrawleeClient :
3638 mock_client_instance = AsyncMock ()
3739 mock_client_instance .crawl .return_value = mocked_data
3840 MockCrawleeClient .return_value = mock_client_instance
39-
41+
4042 extracted_data = await self .website_etl .extract (urls )
41-
43+
4244 self .assertEqual (extracted_data , mocked_data )
4345 MockCrawleeClient .assert_called_once ()
4446 mock_client_instance .crawl .assert_awaited_once_with (links = urls )
You can’t perform that action at this time.
0 commit comments