Skip to content

Commit eea3e11

Browse files
authored
Added get_sync_progress() method (#33)
Added get_sync_progress method
1 parent fde4080 commit eea3e11

File tree

2 files changed

+91
-0
lines changed

2 files changed

+91
-0
lines changed

src/amp/admin/datasets.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,32 @@ def delete(self, namespace: str, name: str) -> None:
290290
"""
291291
path = f'/datasets/{namespace}/{name}'
292292
self._admin._request('DELETE', path)
293+
294+
def get_sync_progress(self, namespace: str, name: str, revision: str = 'latest') -> models.SyncProgressResponse:
295+
"""Get sync progress for a dataset version.
296+
297+
Returns per-table sync progress including current block numbers,
298+
job status, and file statistics. This is useful for monitoring
299+
the progress of data extraction jobs.
300+
301+
Args:
302+
namespace: Dataset namespace
303+
name: Dataset name
304+
revision: Version tag or semantic version (default: 'latest')
305+
306+
Returns:
307+
SyncProgressResponse with sync progress for all tables
308+
309+
Raises:
310+
DatasetNotFoundError: If dataset/version not found
311+
GetSyncProgressError: If retrieval fails
312+
313+
Example:
314+
>>> client = AdminClient('http://localhost:8080')
315+
>>> progress = client.datasets.get_sync_progress('_', 'eth_firehose', 'latest')
316+
>>> for table in progress.tables:
317+
... print(f'{table.table_name}: block {table.current_block}, status: {table.job_status}')
318+
"""
319+
path = f'/datasets/{namespace}/{name}/versions/{revision}/sync-progress'
320+
response = self._admin._request('GET', path)
321+
return models.SyncProgressResponse.model_validate(response.json())

src/amp/admin/models.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,68 @@ class DeployRequest(BaseModel):
777777
"""
778778

779779

780+
class TableSyncProgress(BaseModel):
781+
"""
782+
Sync progress information for a single table
783+
"""
784+
785+
table_name: str
786+
"""
787+
Name of the table within the dataset
788+
"""
789+
current_block: Optional[int] = None
790+
"""
791+
Highest block number that has been synced (null if no data yet)
792+
"""
793+
start_block: Optional[int] = None
794+
"""
795+
Lowest block number that has been synced (null if no data yet)
796+
"""
797+
job_id: Optional[int] = None
798+
"""
799+
ID of the writer job (null if no active job)
800+
"""
801+
job_status: Optional[str] = None
802+
"""
803+
Status of the writer job (null if no active job)
804+
"""
805+
files_count: int
806+
"""
807+
Number of Parquet files written for this table
808+
"""
809+
total_size_bytes: int
810+
"""
811+
Total size of all Parquet files in bytes
812+
"""
813+
814+
815+
class SyncProgressResponse(BaseModel):
816+
"""
817+
API response containing sync progress information for a dataset
818+
"""
819+
820+
dataset_namespace: str
821+
"""
822+
Dataset namespace
823+
"""
824+
dataset_name: str
825+
"""
826+
Dataset name
827+
"""
828+
revision: str
829+
"""
830+
Requested revision
831+
"""
832+
manifest_hash: str
833+
"""
834+
Resolved manifest hash
835+
"""
836+
tables: list[TableSyncProgress]
837+
"""
838+
Sync progress for each table in the dataset
839+
"""
840+
841+
780842
class WorkerDetailResponse(BaseModel):
781843
"""
782844
Detailed worker information returned by the API

0 commit comments

Comments
 (0)