Skip to content

Commit

Permalink
Add B2Share support #88 (#89)
Browse files Browse the repository at this point in the history
Co-authored-by: Jonathan de Bruin <jonathandebruinos@gmail.com>
  • Loading branch information
micafer and J535D165 authored Sep 30, 2024
1 parent c0abca0 commit 66d71ab
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
2 changes: 2 additions & 0 deletions datahugger/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datahugger.services import ArXivDataset
from datahugger.services import B2shareDataset
from datahugger.services import DataDryadDataset
from datahugger.services import DataEuropaDataset
from datahugger.services import DataOneDataset
Expand Down Expand Up @@ -118,6 +119,7 @@
"trolling.uit.no": DataverseDataset,
"www.sodha.be": DataverseDataset,
"www.uni-hildesheim.de": DataverseDataset,
"b2share.eudat.eu": B2shareDataset,
"data.europa.eu": DataEuropaDataset,
}

Expand Down
22 changes: 21 additions & 1 deletion datahugger/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ class MendeleyDataset(DatasetDownloader):
class OSFDataset(DatasetDownloader):
"""Downloader for OSF repository."""

REGEXP_ID = r"osf\.io\/(?P<record_id>.*)/"
REGEXP_ID = r"osf\.io\/(?P<record_id>[^\/]*)\/{0,1}"

# the base entry point of the REST API
API_URL = "https://api.osf.io/v2/nodes/"
Expand Down Expand Up @@ -425,3 +425,23 @@ class SeaNoeDataset(DatasetDownloader):
ATTR_SIZE_JSONPATH = "size"
ATTR_HASH_JSONPATH = "checksum"
ATTR_HASH_TYPE_VALUE = "sha256"


class B2shareDataset(DatasetDownloader):
"""Downloader for B2Share repository."""

REGEXP_ID = r"b2share\.eudat\.eu\/records\/(?P<record_id>[0-9a-z]+)"

# the base entry point of the REST API
API_URL = "https://b2share.eudat.eu/api/"

# the files and metadata about the dataset
API_URL_META = "{api_url}records/{record_id}"
META_FILES_JSONPATH = "files[*]"

# paths to file attributes
ATTR_NAME_JSONPATH = "key"
ATTR_FILE_LINK_JSONPATH = "ePIC_PID"
ATTR_SIZE_JSONPATH = "size"
ATTR_HASH_JSONPATH = "checksum"
ATTR_HASH_TYPE_VALUE = "md5"
4 changes: 4 additions & 0 deletions tests/test_repositories.toml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ files = "AA_age.tab"
location = "https://github.com/j535d165/cbsodata"
files = "cbsodata-main/README.md"

[[b2share]]
location = "https://b2share.eudat.eu/records/db2ef5890fa44c7a85af366a50de73b9"
files = "2024-02-13.sav"

[[dataeuropa]]
location = "https://data.europa.eu/data/datasets/65e092e4009f18f050b14216"
files = "consolidation-wattzhub-schema-irve-statique-20240220-152202.csv"
Expand Down

0 comments on commit 66d71ab

Please sign in to comment.