Skip to content

Commit 3a4ba8f

Browse files
akshay11298Akshay Avinashjanbucharvdusek
authored
feat: Added get_public_url method to KeyValueStore (#572)
### Description - Implement get_public_url method in KeyValueStore ### Issues <!-- If applicable, reference any related GitHub issues --> - Closes: #514 ### Testing - Unit tests added ### Checklist - [x] CI passed --------- Co-authored-by: Akshay Avinash <akshay.avinash@sap.com> Co-authored-by: Jan Buchar <Teyras@gmail.com> Co-authored-by: Vlada Dusek <v.dusek96@gmail.com>
1 parent 9466ce6 commit 3a4ba8f

4 files changed

Lines changed: 66 additions & 0 deletions

File tree

src/crawlee/base_storage_client/_base_key_value_store_client.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,17 @@ async def delete_record(self, key: str) -> None:
114114
Args:
115115
key: The key of the record which to delete
116116
"""
117+
118+
@abstractmethod
119+
async def get_public_url(self, key: str) -> str:
120+
"""Get the public URL for the given key.
121+
122+
Args:
123+
key: Key of the record for which URL is required.
124+
125+
Returns:
126+
The public URL for the given key.
127+
128+
Raises:
129+
ValueError: If the key does not exist.
130+
"""

src/crawlee/memory_storage_client/_key_value_store_client.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,28 @@ async def delete_record(self, key: str) -> None:
287287
if self._memory_storage_client.persist_storage:
288288
await existing_store_by_id.delete_persisted_record(record)
289289

290+
@override
291+
async def get_public_url(self, key: str) -> str:
292+
existing_store_by_id = find_or_create_client_by_id_or_name_inner(
293+
resource_client_class=KeyValueStoreClient,
294+
memory_storage_client=self._memory_storage_client,
295+
id=self.id,
296+
name=self.name,
297+
)
298+
299+
if existing_store_by_id is None:
300+
raise_on_non_existing_storage(StorageTypes.KEY_VALUE_STORE, self.id)
301+
302+
record = await self._get_record_internal(key)
303+
304+
if not record:
305+
raise ValueError(f'Record with key "{key}" was not found.')
306+
307+
resource_dir = existing_store_by_id.resource_directory
308+
record_filename = self._filename_from_record(record)
309+
record_path = os.path.join(resource_dir, record_filename)
310+
return f'file://{record_path}'
311+
290312
async def persist_record(self, record: KeyValueStoreRecord) -> None:
291313
"""Persist the specified record to the key-value store."""
292314
store_directory = self.resource_directory

src/crawlee/storages/_key_value_store.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,14 @@ async def set_value(
157157
return await self._resource_client.delete_record(key)
158158

159159
return await self._resource_client.set_record(key, value, content_type)
160+
161+
async def get_public_url(self, key: str) -> str:
162+
"""Get the public URL for the given key.
163+
164+
Args:
165+
key: Key of the record for which URL is required.
166+
167+
Returns:
168+
The public URL for the given key.
169+
"""
170+
return await self._resource_client.get_public_url(key)

tests/unit/storages/test_key_value_store.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import annotations
22

3+
import asyncio
34
from typing import AsyncGenerator
5+
from urllib.parse import urlparse
46

57
import pytest
68

@@ -100,3 +102,20 @@ async def test_static_get_set_value(key_value_store: KeyValueStore) -> None:
100102
await key_value_store.set_value('test-static', 'static')
101103
value = await key_value_store.get_value('test-static')
102104
assert value == 'static'
105+
106+
107+
async def test_get_public_url_raises_for_non_existing_key(key_value_store: KeyValueStore) -> None:
108+
with pytest.raises(ValueError, match='was not found'):
109+
await key_value_store.get_public_url('i-do-not-exist')
110+
111+
112+
async def test_get_public_url(key_value_store: KeyValueStore) -> None:
113+
await key_value_store.set_value('test-static', 'static')
114+
public_url = await key_value_store.get_public_url('test-static')
115+
116+
url = urlparse(public_url)
117+
path = url.netloc if url.netloc else url.path
118+
119+
with open(path) as f: # noqa: ASYNC230
120+
content = await asyncio.to_thread(f.read)
121+
assert content == 'static'

0 commit comments

Comments
 (0)