Skip to content

Commit 2aa9fa2

Browse files
authored
Merge pull request #34 from OpenDataServices/portals
Portals
2 parents 2fc7dca + d891609 commit 2aa9fa2

File tree

7 files changed

+78
-18
lines changed

7 files changed

+78
-18
lines changed

Dockerfile.dev

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ RUN apt-get update \
55

66
WORKDIR /oc4ids_datastore_pipeline
77

8-
COPY requirements.txt .
8+
COPY requirements_dev.txt .
99

10-
RUN pip install -r requirements.txt
10+
RUN pip install -r requirements_dev.txt
1111

1212
COPY . .
1313

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""add_portals
2+
3+
Revision ID: cde761a59c2f
4+
Revises: b21b5de6ee2d
5+
Create Date: 2025-07-08 07:51:48.954914
6+
7+
"""
8+
from typing import Sequence, Union
9+
10+
from alembic import op
11+
import sqlalchemy as sa
12+
13+
14+
# revision identifiers, used by Alembic.
15+
revision: str = 'cde761a59c2f'
16+
down_revision: Union[str, None] = 'b21b5de6ee2d'
17+
branch_labels: Union[str, Sequence[str], None] = None
18+
depends_on: Union[str, Sequence[str], None] = None
19+
20+
21+
def upgrade() -> None:
22+
# ### commands auto generated by Alembic - please adjust! ###
23+
op.add_column('dataset', sa.Column('portal_url', sa.String(), nullable=True))
24+
op.add_column('dataset', sa.Column('portal_title', sa.String(), nullable=True))
25+
# ### end Alembic commands ###
26+
27+
28+
def downgrade() -> None:
29+
# ### commands auto generated by Alembic - please adjust! ###
30+
op.drop_column('dataset', 'portal_title')
31+
op.drop_column('dataset', 'portal_url')
32+
# ### end Alembic commands ###

oc4ids_datastore_pipeline/database.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class Dataset(Base):
3636
csv_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3737
xlsx_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3838
updated_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
39+
portal_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
40+
portal_title: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3941

4042

4143
def get_engine() -> Engine:

oc4ids_datastore_pipeline/pipeline.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ def save_dataset_metadata(
111111
json_url: Optional[str],
112112
csv_url: Optional[str],
113113
xlsx_url: Optional[str],
114+
portal_title: Optional[str],
115+
portal_url: Optional[str],
114116
) -> None:
115117
logger.info(f"Saving metadata for dataset {dataset_id}")
116118
try:
@@ -127,6 +129,8 @@ def save_dataset_metadata(
127129
license_url=license_url,
128130
license_title=license_title,
129131
license_title_short=license_title_short,
132+
portal_title=portal_title,
133+
portal_url=portal_url,
130134
json_url=json_url,
131135
csv_url=csv_url,
132136
xlsx_url=xlsx_url,
@@ -157,6 +161,8 @@ def process_dataset(dataset_id: str, registry_metadata: dict[str, str]) -> None:
157161
json_url=json_public_url,
158162
csv_url=csv_public_url,
159163
xlsx_url=xlsx_public_url,
164+
portal_title=registry_metadata["portal_title"],
165+
portal_url=registry_metadata["portal_url"],
160166
)
161167
logger.info(f"Processed dataset {dataset_id}")
162168

oc4ids_datastore_pipeline/registry.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,17 @@ def fetch_registered_datasets() -> dict[str, dict[str, str]]:
1616
r = requests.get(url)
1717
r.raise_for_status()
1818
json_data = r.json()
19-
registered_datasets = {
20-
key: {
21-
"source_url": value["fields"]["url"]["value"],
22-
"country": value["fields"]["country"]["value"],
19+
registered_datasets = {}
20+
for key, value in json_data["records"].items():
21+
r_data = requests.get(value["api_url"])
22+
r_data.raise_for_status()
23+
r_data_json = r_data.json()
24+
registered_datasets[key] = {
25+
"source_url": r_data_json["fields"]["url"]["value"],
26+
"country": r_data_json["fields"]["country"]["value"],
27+
"portal_title": r_data_json["fields"]["portal_title"]["value"],
28+
"portal_url": r_data_json["fields"]["portal_url"]["value"],
2329
}
24-
for (key, value) in json_data["records"].items()
25-
}
2630
registered_datasets_count = len(registered_datasets)
2731
logger.info(f"Fetched URLs for {registered_datasets_count} datasets")
2832
except Exception as e:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "flit_core.buildapi"
55
[project]
66
name = "oc4ids-datastore-pipeline"
77
description = "OC4IDS Datastore Pipeline"
8-
version = "0.5.0"
8+
version = "0.6.0"
99
readme = "README.md"
1010
dependencies = [
1111
"alembic",

tests/test_registry.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,39 @@
1212

1313
def test_fetch_registered_datasets(mocker: MockerFixture) -> None:
1414
mock_response = MagicMock()
15-
mock_response.json.return_value = {
16-
"records": {
17-
"test_dataset": {
18-
"fields": {
19-
"url": {"value": "https://test_dataset.json"},
20-
"country": {"value": "ab"},
15+
mock_response.json.side_effect = [
16+
{
17+
"records": {
18+
"test_dataset": {
19+
"api_url": "http://www.example.com",
20+
"fields": {
21+
"url": {"value": "https://test_dataset.json"},
22+
"country": {"value": "ab"},
23+
},
2124
}
2225
}
23-
}
24-
}
26+
},
27+
{
28+
"fields": {
29+
"url": {"value": "https://test_dataset.json"},
30+
"country": {"value": "ab"},
31+
"portal_title": {"value": "Our Portal"},
32+
"portal_url": {"value": "https://our.portal"},
33+
}
34+
},
35+
]
2536
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
2637
patch_get.return_value = mock_response
2738

2839
result = fetch_registered_datasets()
2940

3041
assert result == {
31-
"test_dataset": {"source_url": "https://test_dataset.json", "country": "ab"}
42+
"test_dataset": {
43+
"source_url": "https://test_dataset.json",
44+
"country": "ab",
45+
"portal_title": "Our Portal",
46+
"portal_url": "https://our.portal",
47+
}
3248
}
3349

3450

0 commit comments

Comments
 (0)