diff --git a/README.rst b/README.rst index 28476c9..00f0211 100644 --- a/README.rst +++ b/README.rst @@ -50,7 +50,7 @@ Download and install the package from PyPI: .. code:: python >>> from sprynger import Meta, OpenAccess, init - >>> init() + >>> init(api_key='your free api key from https://dev.springernature.com') >>> >>> # Get metadata of all chapters in the book with ISBN '978-3-031-63497-0' >>> book_metadata = Metadata(isbn='978-3-031-63497-0', nr_results=3) @@ -81,4 +81,20 @@ Download and install the package from PyPI: 'A quantum-like cognitive approach to modeling human biased selection behavior' 'Cognitive biases of the human mind significantly influence the human decision-making process ...' -.. documentation-end \ No newline at end of file +.. documentation-end + +📖 Documentation +----------------- + +For a comprehensive guide, see the documentation in `read the docs `_. + +⭐️ Give the package a star +--------------------------- + +If the package helped you, give it a star! + +⚠️ Disclaimer +-------------- + +This project is an independent API wrapper for the Springer Nature API. +It is not affiliated with, endorsed, or maintained by Springer Nature. For official support, please refer to the Springers's `documentation `_. and support channels. \ No newline at end of file diff --git a/docs/source/initialization.rst b/docs/source/initialization.rst index 3d5de89..3243aef 100644 --- a/docs/source/initialization.rst +++ b/docs/source/initialization.rst @@ -7,28 +7,30 @@ You initalize `sprynger` as follows: >>> import sprynger - >>> sprynger.init() + >>> sprynger.init(api_key='your key') -This reads the configuration from the default locations. You may also specify a different configuration file and API keys: +This reads your api key and uses the default configuration. You can also define `API_KEY` as an environment +variable. To use a custom configuration specify `config_file` in `init()`. -.. function:: init(config_dir: Union[str, Path], keys: Optional[List[str]]) +.. function:: init(api_key: Optional[str] = None, config_file: Optional[Union[str, Path]] = None) -> None - Function to initialize the sprynger library. For more information, see the - `documentation `_. + Function to initialize the sprynger library. For more information go to the + `documentation `_. - :param config_dir: Path to the configuration file (default is `~/.config/sprynger/sprynger.cfg`). - :type config_dir: str - :param keys: List of API keys (default is None). - :type keys: list, optional + :param api_key: API key + :type api_key: str, optional + :param config_file: Path to the configuration .toml file. + :type config_file: str or Path, optional - :raises FileNotFoundError: If the configuration file is not found. + :raises ValueError: If no API key was provided either as an argument or as an + environment variable `API_KEY`. - **Example**: + Example: - .. code:: python + .. code:: python - >>> from sprynger import init - >>> init(config_dir='path/to/custom/config.cfg', keys=['key1', 'key2']) + from sprynger import init + init(api_key='your key', config_file='path/to/custom/config.toml') In case you don't have a configuration file just enter your API key when prompted. @@ -37,13 +39,7 @@ In case you don't have a configuration file just enter your API key when prompte Configuration ============= -`sprynger` stores values it needs for operation in a configuration file called `sprynger.cfg`. -The config file saves credentials as well as directory names for folders that store downloaded results. -`sprynger` reads this file on startup. - -You can find the configuration file in: `~/.config/sprynger/sprynger.cfg` - -By default, after initial set-up (see below), the file will look like this: +The configuration file has to be a TOML file with the following structure. If any information is missing the default will be used. .. code-block:: cfg @@ -52,9 +48,6 @@ By default, after initial set-up (see below), the file will look like this: Meta = /Users/user/.cache/sprynger/meta OpenAccess = /Users/user/.cache/sprynger/open_access - [Authentication] - APIKey = XXX - [Requests] Timeout = 20 Retries = 5 @@ -63,8 +56,4 @@ By default, after initial set-up (see below), the file will look like this: Section `[Directories]` contains the paths where `sprynger` should store (cache) downloaded files. `sprynger` will create them if necessary. -Section `[Authentication]` contains the API Keys which you obtain from https://dev.springernature.com. - Section `[Requests]` contains the default values for the requests library. - -Simply edit this file using a simple text editor; changes will take effect the next time you start `sprynger`. Remember to indent multi-line statements. diff --git a/pyproject.toml b/pyproject.toml index f9d0740..a1f13d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta" [project] name = "sprynger" -version = "0.2.0" +version = "0.3.0" authors = [ { name="Nils Herrmann", email="nils_herrmann@outlook.de" }, ] description = "An API wrapper for Springer Nature" readme = "README.rst" requires-python = ">=3.9" -dependencies = ["lxml", "requests", "urllib3"] +dependencies = ["lxml", "requests", "urllib3", "platformdirs"] keywords = ["API", "Springer", "wrapper", "requests", "lxml"] license = { text = "MIT" } classifiers = [ diff --git a/sprynger/base.py b/sprynger/base.py index fb728bb..26c3da0 100644 --- a/sprynger/base.py +++ b/sprynger/base.py @@ -13,7 +13,8 @@ from sprynger.utils.constants import BASE_URL, FORMAT, LIMIT, ONLINE_API from sprynger.utils.fetch import fetch_data -from sprynger.utils.startup import get_config, get_keys +from sprynger.utils.parse import chained_get +from sprynger.utils.startup import get_config, get_key class Base: """Base class to retrieve data from the Springer API.""" @@ -43,15 +44,15 @@ def __init__(self, rate_limit = LIMIT['Premium'][api] if premium else LIMIT['Basic'][api] limit = min(nr_results, rate_limit) - keys = get_keys() + key = get_key() self._url = f'{BASE_URL}/{online_api}/{FORMAT[api]}' self._params = {'q': query, 's': start, 'p': limit, - 'api_key': keys[0]} + 'api_key': key} # Generate a cache key based on the parameters - cache_dir = config.get('Directories', api) + cache_dir = chained_get(config, ['Directories', api]) self._cache_key = self._create_cache_key(query, start, limit) self._cache_file = os.path.join(cache_dir, f'{self._cache_key}.{FORMAT[api]}') self._refresh = refresh diff --git a/sprynger/exceptions.py b/sprynger/exceptions.py index e2d0245..a586ed5 100644 --- a/sprynger/exceptions.py +++ b/sprynger/exceptions.py @@ -7,6 +7,11 @@ def __init__(self, status_code, message="An error occurred with the API request" self.message = message super().__init__(f"{status_code}: {message}") +class MissingAPIKeyError(Exception): + """Exception raised when an API key is missing.""" + def __init__(self, message="API key is missing. Please provide a valid API key."): + self.message = message + super().__init__(self.message) class AuthenticationError(APIError): """Exception raised for 401/403 Authentication Failures""" diff --git a/sprynger/openaccess_article.py b/sprynger/openaccess_article.py index 28ab2ef..5189f35 100644 --- a/sprynger/openaccess_article.py +++ b/sprynger/openaccess_article.py @@ -1,18 +1,25 @@ """Module with the Article class for the OpenAccess class.""" from typing import Optional -from sprynger.utils.data_structures import Affiliation, Contributor, Date, Paragraph +from sprynger.utils.data_structures import Affiliation, Contributor, Date, Paragraph, Reference from sprynger.utils.parse import get_attr, get_text from sprynger.utils.parse_openaccess import ( affs_to_dict, + get_acknowledgements, get_contributors, get_affiliations, get_date, get_paragraphs, + get_reference_list ) class Article: """Auxiliary class to parse an article from a journal.""" + @property + def acknowledgements(self) -> Optional[str]: + """Acknowledgements of the article.""" + return get_acknowledgements(self._article_back) + @property def affiliations(self) -> list[Affiliation]: """List of affiliations of the collaborators of the article. Each affiliation is represented @@ -150,6 +157,17 @@ def publisher_loc(self) -> Optional[str]: def publisher_name(self) -> Optional[str]: """Name of the publisher.""" return get_text(self._journal_meta, './/publisher-name') + + @property + def references(self) -> list[Reference]: + """References of the article. + + Returns: + list[Reference]: A list of Reference objects containing the + `ref_list_id`, `ref_list_title`, `ref_id`, `ref_label`, `ref_publication_type`, + `authors`, `editors`, `names`, `ref_title`, `ref_source`, `ref_year`, `ref_doi`. + """ + return get_reference_list(self._article_back) @property def title(self) -> Optional[str]: @@ -160,6 +178,7 @@ def __init__(self, data): self._data = data self._journal_meta = data.find('.//front/journal-meta') self._article_meta = data.find('.//front/article-meta') + self._article_back = data.find('.//back') def __repr__(self) -> str: return f'Article {self.doi}' diff --git a/sprynger/openaccess_chapter.py b/sprynger/openaccess_chapter.py index 80b4ca7..b644a4a 100644 --- a/sprynger/openaccess_chapter.py +++ b/sprynger/openaccess_chapter.py @@ -1,18 +1,25 @@ """Module with the chapter class for the OpenAccess class.""" from typing import Optional, Union -from sprynger.utils.data_structures import Affiliation, Contributor, Date, Paragraph +from sprynger.utils.data_structures import Affiliation, Contributor, Date, Paragraph, Reference from sprynger.utils.parse import get_attr, get_text, make_int_if_possible from sprynger.utils.parse_openaccess import ( affs_to_dict, + get_acknowledgements, get_contributors, get_affiliations, get_date, get_paragraphs, + get_reference_list ) class Chapter: """Auxiliary class to parse a chapter from a book.""" + @property + def acknowledgements(self) -> Optional[str]: + """Acknowledgements of the chapter.""" + return get_acknowledgements(self._chapter_back) + @property def affiliations(self) -> list[Affiliation]: """List of affiliations of the collaborators of the chapter. Each affiliation is represented @@ -136,6 +143,17 @@ def publisher_name(self) -> Optional[str]: """Name of the publisher.""" return get_text(self._book_meta, './/publisher/publisher-name') + @property + def references(self) -> list[Reference]: + """References of the chapter. + + Returns: + list[Reference]: A list of Reference objects containing the + `ref_list_id`, `ref_list_title`, `ref_id`, `ref_label`, `ref_publication_type`, + `authors`, `editors`, `names`, `ref_title`, `ref_source`, `ref_year`, `ref_doi`. + """ + return get_reference_list(self._chapter_back) + @property def title(self) -> Optional[str]: """Title of the chapter.""" @@ -144,8 +162,9 @@ def title(self) -> Optional[str]: def __init__(self, data): self._data = data self._book_meta = data.find('.//book-meta') + self._chapter_back = data.find('.//back') self._chapter_meta = data.find('.//book-part[@book-part-type="chapter"]/book-part-meta') def __repr__(self) -> str: - return f'Chapter {self.doi}' \ No newline at end of file + return f'Chapter {self.doi}' diff --git a/sprynger/tests/test_config.toml b/sprynger/tests/test_config.toml new file mode 100644 index 0000000..255d64a --- /dev/null +++ b/sprynger/tests/test_config.toml @@ -0,0 +1,3 @@ +# This is a config used in test_startup +[Directories] +Metadata = "./sprynger/tests/" \ No newline at end of file diff --git a/sprynger/tests/test_exceptions.py b/sprynger/tests/test_exceptions.py index c57bdf0..ea2ffe4 100644 --- a/sprynger/tests/test_exceptions.py +++ b/sprynger/tests/test_exceptions.py @@ -12,6 +12,6 @@ def test_empty_response(): def test_authentication_error(): """Test the authentication error.""" - init(keys=['does_not_exist']) + init(api_key='does_not_exist') with pytest.raises(AuthenticationError, match='Authentication failed. Check your API key.'): Metadata('10.1007/s10660-023-09761-x', refresh=True) diff --git a/sprynger/tests/test_openaccess.py b/sprynger/tests/test_openaccess.py index 582b0ef..693256e 100644 --- a/sprynger/tests/test_openaccess.py +++ b/sprynger/tests/test_openaccess.py @@ -1,13 +1,14 @@ """Tests for the OpenAccess class.""" from sprynger import init, OpenAccess from sprynger.openaccess import Article, Chapter -from sprynger.utils.data_structures import Affiliation, Contributor, Date, Paragraph +from sprynger.utils.data_structures import Affiliation, Contributor, Date, Paragraph, Reference init() book = OpenAccess(isbn="978-3-031-63500-7", start=1, nr_results=2, refresh=30) chapter = OpenAccess("doi:10.1007/978-3-031-61874-1_5", refresh=30) chapter_with_text = OpenAccess(doi="10.1007/978-3-031-24498-8_7", refresh=30) +chapter_for_references = OpenAccess(doi='10.1007/978-3-031-63498-7_20') book_pagination = OpenAccess(isbn='978-3-031-63498-7', nr_results=30, refresh=30) journal = OpenAccess(issn="2198-6053", start=4, nr_results=3, refresh=30) @@ -59,6 +60,7 @@ def test_article_dates(): def test_article_meta(): """Test the article meta-data.""" for a in article: + assert a.acknowledgements == 'We would like to express our gratitude and thank all the stakeholders: Professor Guangzhong Liu, Zhanhui Hu, Siqing Zhuang (Shanghai Maritime University), Yanping Li (Jiangxi University of Technology).' assert a.article_type == 'research-article' assert a.language == 'en' assert a.publisher_id == 's40747-024-01577-y' @@ -66,6 +68,39 @@ def test_article_meta(): assert a.doi == '10.1007/s40747-024-01577-y' assert a.title == 'SAGB: self-attention with gate and BiGRU network for intrusion detection' +def test_article_references(): + """Test the references of the article.""" + expected_ref_last = Reference( + ref_list_id="Bib1", + ref_list_title="References", + ref_id="CR42", + ref_label="42.", + ref_publication_type="journal", + authors=["P Mishra", "V Varadharajan", "U Tupakula", "ES Pilli"], + editors=[], + names=[], + ref_title="A detailed investigation and analysis of using machine learning techniques for intrusion detection", + ref_source="IEEE Commun Surv Tutor", + ref_year="2018", + ref_doi="10.1109/comst.2018.2847722", + ) + + expected_ref_14 = Reference( + ref_list_id="Bib1", + ref_list_title="References", + ref_id="CR15", + ref_label="15.", + ref_publication_type="other", + authors=[], + editors=[], + names=[], + ref_title="Rashid A, Siddique MJ, Ahmed SM (2020) Machine and deep learning based comparative analysis using hybrid approaches for intrusion detection system", + ref_source=None, + ref_year=None, + ref_doi=None, + ) + assert article[0].references[-1] == expected_ref_last + assert article[0].references[14] == expected_ref_14 def test_book_meta(): """Test the book meta data.""" @@ -127,12 +162,49 @@ def test_chapter_dates(): def test_chapter_meta(): """Test the chapter meta data.""" for one_chapter in chapter: + assert one_chapter.acknowledgements is None assert one_chapter.doi == '10.1007/978-3-031-61874-1_5' assert one_chapter.chapter_nr == 5 assert one_chapter.title == 'Tools and Applications' assert isinstance(book[0], Chapter) +def test_chapter_references(): + """Test the references of the chapter.""" + expected_ref_1 = Reference( + ref_list_id='Bib1', + ref_list_title='References', + ref_id='CR1', + ref_label='1.', + ref_publication_type='other', + authors=[], + editors=[], + names=[], + ref_title='Beyersdorff, O., Hinde, L., Pich, J.: Reasons for hardness in QBF proof systems. ACM Trans. Comput. Theory 12(2), 10:1–10:27 (2020). https://doi.org/10.1145/3378665', + ref_source=None, + ref_year=None, + ref_doi='10.1145/3378665', + ) + expected_ref_2 = Reference( + ref_list_id='Bib1', + ref_list_title='References', + ref_id='CR3', + ref_label='3.', + ref_publication_type='confproc', + authors=['A Biere', 'F Lonsing', 'M Seidl'], + editors=['N Bjørner', 'V Sofronie-Stokkermans'], + names=[], + ref_title='Blocked clause elimination for QBF', + ref_source='Automated Deduction – CADE-23', + ref_year='2011', + ref_doi='10.1007/978-3-642-22438-6_10', + ) + for one_chapter in chapter_for_references: + assert one_chapter.references[0] == expected_ref_1 + assert one_chapter.references[2] == expected_ref_2 + + assert chapter[0].references == [] + def test_iterable(): """Test the lengths""" diff --git a/sprynger/tests/test_startup.py b/sprynger/tests/test_startup.py index 4f21ed0..4f982af 100644 --- a/sprynger/tests/test_startup.py +++ b/sprynger/tests/test_startup.py @@ -1,28 +1,17 @@ """Test the startup of the sprynger package.""" -from pathlib import Path - from sprynger import init -from sprynger.utils.startup import CUSTOM_KEYS, get_config, get_keys - - -def test_empty_keys(): - """Test the empty keys.""" - assert CUSTOM_KEYS is None +from sprynger.utils.parse import chained_get +from sprynger.utils.startup import get_config, get_key +def test_custom_api(): + """Test use of custom config file""" + init(api_key='not existing key', + config_file='./sprynger/tests/test_config.toml') + config = get_config() + assert chained_get(config, ['Directories', 'Metadata']) == './sprynger/tests/' def test_init(): """Test the init function.""" init() assert get_config() is not None - assert get_keys() is not None - - -def test_create_config(): - """Test the create_config function.""" - config_dir = Path.home()/'.config'/'sprynger'/'test_sprynger.cfg' - # Dete file - config_dir.unlink(missing_ok=True) - assert not config_dir.exists() - # Create file - init(config_dir, ['test_key']) - assert config_dir.exists() + assert get_key() is not None diff --git a/sprynger/utils/constants.py b/sprynger/utils/constants.py index 0eb4a2b..37e6e95 100644 --- a/sprynger/utils/constants.py +++ b/sprynger/utils/constants.py @@ -1,11 +1,10 @@ '''Constants for the sprynger package.''' from pathlib import Path +from platformdirs import user_cache_dir BASE_URL = 'http://api.springernature.com' -CONFIG_FILE = Path.home()/'.config'/'sprynger'/'sprynger.cfg' - -BASE_PATH = Path.home()/'.cache'/'sprynger' +BASE_PATH = Path(user_cache_dir('sprynger')) DEFAULT_PATHS = { 'Metadata': BASE_PATH/'metadata', 'Meta': BASE_PATH/'meta', @@ -41,6 +40,12 @@ }, } +REQUESTS = { + 'Timeout': 20, + 'Retries': 5, + 'BackoffFactor': 2.0 +} + VALID_FIELDS = { "doi": { "api": ["Metadata", "OpenAccess", "Meta"], diff --git a/sprynger/utils/create_config.py b/sprynger/utils/create_config.py deleted file mode 100644 index 950b0d8..0000000 --- a/sprynger/utils/create_config.py +++ /dev/null @@ -1,55 +0,0 @@ -"""This module contains the function to create a configuration file.""" -import configparser -from pathlib import Path -from typing import Optional - -from sprynger.utils.constants import CONFIG_FILE - - -def create_config(config_dir: Optional[Path] = None, - keys: Optional[list[str]] = None - ): - """Initiates process to generate configuration file. - - :param keys: If you provide a list of keys, sprynger will skip the prompt. - """ - from sprynger.utils.constants import DEFAULT_PATHS - - if not config_dir: - config_dir = CONFIG_FILE - - config = configparser.ConfigParser() - config.optionxform = str - print(f"Creating config file at {config_dir} with default paths...") - - # Set directories - config.add_section('Directories') - for api, path in DEFAULT_PATHS.items(): - config.set('Directories', api, str(path)) - - # Set authentication - config.add_section('Authentication') - if keys: - if not isinstance(keys, list): - raise ValueError("Parameter `keys` must be a list.") - key = ", ".join(keys) - else: - prompt_key = "Please enter your API Key(s), obtained from "\ - "https://dev.springernature.com. Separate "\ - "multiple keys by comma:\n" - key = input(prompt_key) - config.set('Authentication', 'APIKey', key) - - # Set default values - config.add_section('Requests') - config.set('Requests', 'Timeout', '20') - config.set('Requests', 'Retries', '5') - config.set('Requests', 'BackoffFactor', '2.0') - - # Write out - config_dir.parent.mkdir(parents=True, exist_ok=True) - with open(config_dir, "w") as ouf: - config.write(ouf) - print(f"Configuration file successfully created at {config_dir}\n" - "For details see https://sprynger.rtfd.io/en/stable/initialization.html.") - return config diff --git a/sprynger/utils/data_structures.py b/sprynger/utils/data_structures.py index e2c666b..bd9a2d1 100644 --- a/sprynger/utils/data_structures.py +++ b/sprynger/utils/data_structures.py @@ -44,6 +44,13 @@ def create_namedtuple(name: str, fields: list, defaults=None): fields_date = ['year', 'month', 'day'] Date = create_namedtuple('Date', fields_date) +fields_oa_reference = ['ref_list_id', 'ref_list_title', 'ref_id', + 'ref_label', 'ref_publication_type', 'authors', + 'editors', 'names', + 'ref_title', 'ref_source', 'ref_year', 'ref_doi'] +Reference = create_namedtuple('Reference', fields_oa_reference) + + ############################# # Meta # ############################# diff --git a/sprynger/utils/fetch.py b/sprynger/utils/fetch.py index 2421593..45c4281 100644 --- a/sprynger/utils/fetch.py +++ b/sprynger/utils/fetch.py @@ -4,6 +4,7 @@ from urllib3.util.retry import Retry from sprynger.utils.startup import get_config +from sprynger.utils.parse import chained_get from sprynger.exceptions import ( APIError, @@ -47,9 +48,9 @@ def fetch_data(url: str, params: dict) -> Response: """Fetch data from the Springer API.""" # Get the configuration config = get_config() - max_retries = config.getint('Requests', 'Retries', fallback=5) - backoff_factor = config.getfloat('Requests', 'BackoffFactor', fallback=2.0) - timeout = config.getint('Requests', 'Timeout', fallback=20) + max_retries = int(chained_get(config, ['Requests', 'Retries'], 5)) + backoff_factor = float(chained_get(config, ['Requests', 'BackoffFactor'], 2.0)) + timeout = int(chained_get(config, ['Requests', 'Timeout'], 20)) # Create session and retrieve data session = create_session(max_retries, backoff_factor) diff --git a/sprynger/utils/parse.py b/sprynger/utils/parse.py index 146cc73..9e62ada 100644 --- a/sprynger/utils/parse.py +++ b/sprynger/utils/parse.py @@ -1,6 +1,6 @@ """Utility functions for parsing data.""" from functools import reduce -from typing import Optional +from typing import Optional, Union from lxml.etree import _Element @@ -15,6 +15,24 @@ def get_attr(node: Optional[_Element], return node.find(f'.//{tag}[@{attr}="{value}"]').text return None +def chained_get(data: dict, keys: list, default=None) -> Union[str, int, float]: + """ + Retrieve a value from a nested dictionary using a list of keys. + + Args: + data (dict): The dictionary to search. + keys (list): A list of keys representing the path to the value. + + Returns: + The value at the specified path, or `default` if the path does not exist. + """ + for key in keys: + if isinstance(data, dict): + data = data.get(key) + else: + return default + return data + def get_text(node: Optional[_Element], path: str) -> Optional[str]: @@ -25,6 +43,17 @@ def get_text(node: Optional[_Element], return None +def stringify_descendants(node: Optional[_Element]) -> Optional[str]: + """ + Filters and removes possible Nones in texts and tails. + If descendants are present, it will return their text. + ref: http://stackoverflow.com/questions/4624062/get-all-text-inside-a-tag-in-lxml + """ + if node is not None: + return "".join(node.itertext()) + return None + + def make_int_if_possible(val): """Attempt a conversion to int type.""" try: diff --git a/sprynger/utils/parse_openaccess.py b/sprynger/utils/parse_openaccess.py index 6d5258e..4ffb9c0 100644 --- a/sprynger/utils/parse_openaccess.py +++ b/sprynger/utils/parse_openaccess.py @@ -1,8 +1,10 @@ """Module with auxiliary functions to parse OpenAccess documents.""" +from typing import Optional + from lxml.etree import _Element -from sprynger.utils.data_structures import Affiliation, Contributor, Date, Paragraph -from sprynger.utils.parse import get_attr, get_text, make_int_if_possible +from sprynger.utils.data_structures import Affiliation, Contributor, Date, Paragraph, Reference +from sprynger.utils.parse import get_attr, get_text, make_int_if_possible, stringify_descendants def affs_to_dict(affs) -> dict[str, Affiliation]: @@ -10,49 +12,60 @@ def affs_to_dict(affs) -> dict[str, Affiliation]: return {aff.nr: aff for aff in affs} +def get_acknowledgements(back: _Element) -> Optional[str]: + """Parse the acknowledgements of the document.""" + paragraphs = [] + if back is not None: + for p in back.findall('.//ack/p', []): + paragraphs.append(get_text(p, '.')) + return '\n'.join(paragraphs) or None + + def get_affiliations(data: _Element) -> list[Affiliation]: """Parse the affiliations of the document.""" affiliations = [] - for contrib_group in data.findall('.//contrib-group'): - contribution_group = contrib_group.get('content-type') - for a in contrib_group.findall('.//aff'): - institution = a.find('.//institution-wrap') - new_aff = Affiliation( - type=contribution_group, - ref_nr=a.get('id'), - ror=get_attr(institution, 'institution-id', 'institution-id-type', 'ROR'), - grid=get_attr(institution, 'institution-id', 'institution-id-type', 'GRID'), - isni=get_attr(institution, 'institution-id', 'institution-id-type', 'ISNI'), - division=get_attr(institution, 'institution', 'content-type', 'org-division'), - name=get_attr(institution, 'institution', 'content-type', 'org-name'), - city=get_attr(a, 'addr-line', 'content-type', 'city'), - country=get_text(a, './/country') - ) - affiliations.append(new_aff) + if data is not None: + for contrib_group in data.findall('.//contrib-group'): + contribution_group = contrib_group.get('content-type') + for a in contrib_group.findall('.//aff'): + institution = a.find('.//institution-wrap') + new_aff = Affiliation( + type=contribution_group, + ref_nr=a.get('id'), + ror=get_attr(institution, 'institution-id', 'institution-id-type', 'ROR'), + grid=get_attr(institution, 'institution-id', 'institution-id-type', 'GRID'), + isni=get_attr(institution, 'institution-id', 'institution-id-type', 'ISNI'), + division=get_attr(institution, 'institution', 'content-type', 'org-division'), + name=get_attr(institution, 'institution', 'content-type', 'org-name'), + city=get_attr(a, 'addr-line', 'content-type', 'city'), + country=get_text(a, './/country') + ) + affiliations.append(new_aff) return affiliations def get_contributors(data: _Element) -> list[Contributor]: """Parse the contributors of the document and matcg them with their affiliations.""" contributors = [] - for c in data.findall('.//contrib'): - # Get affiliation - affs_nr = [] - for aff_ref in c.findall('.//xref[@ref-type="aff"]'): - aff_nr = aff_ref.get('rid') - affs_nr.append(aff_nr) - - # Get contributor data - new_contrib = Contributor( - type=c.get('contrib-type'), - nr=c.get('id'), - orcid=get_attr(c, 'contrib-id', 'contrib-id-type', 'orcid'), - surname=get_text(c, './/name/surname'), - given_name=get_text(c, './/name/given-names'), - email=get_text(c, './/email'), - affiliations_ref_nr=affs_nr - ) - contributors.append(new_contrib) + if data is not None: + for c in data.findall('.//contrib'): + # Get affiliation + affs_nr = [] + for aff_ref in c.findall('.//xref[@ref-type="aff"]'): + aff_nr = aff_ref.get('rid') + affs_nr.append(aff_nr) + + # Get contributor data + new_contrib = Contributor( + type=c.get('contrib-type'), + nr=c.get('id'), + orcid=get_attr(c, 'contrib-id', 'contrib-id-type', 'orcid'), + surname=get_text(c, './/name/surname'), + given_name=get_text(c, './/name/given-names'), + email=get_text(c, './/email'), + affiliations_ref_nr=affs_nr + ) + contributors.append(new_contrib) return contributors @@ -91,3 +104,90 @@ def get_paragraphs(xml) -> list[Paragraph]: return parsed_paragraphs return [] + +def _get_doi(ref_node: _Element) -> Optional[str]: + """Parse DOIs from a reference node.""" + doi = get_attr(ref_node, "pub-id", "pub-id-type", "doi") + if doi is None: + doi = get_attr(ref_node, "ext-link", "ext-link-type", "doi") + if doi is not None: + doi = doi.replace("https://doi.org/", "") + return doi + +def _get_names(ref_node: _Element) -> Optional[list[str]]: + """Parse names from a reference node.""" + names = [] + for person in ref_node.findall('name'): + given_name = stringify_descendants(person.find('given-names')) + surname = stringify_descendants(person.find('surname')) + name = f'{given_name} {surname}' + names.append(name) + return names + +def _get_names_from_group(ref_node: _Element, person_group_type: str) -> Optional[list[str]]: + """Parse names from a person-group node.""" + names = [] + group_node = ref_node.find(f'./person-group[@person-group-type="{person_group_type}"]') + if group_node is not None: + names = _get_names(group_node) + return names + +def _get_reference(reference: _Element) -> Optional[_Element]: + """Get reference from one of the three possible positions.""" + for tag in ["mixed-citation", "element-citation", "citation"]: + ref = reference.find(tag) + if ref is not None: + return ref + return None + +def get_reference_list(back: _Element) -> list[Reference]: + """Parse the references of the document.""" + new_ref_list = [] + if back is not None: + for ref_list in back.findall('.//ref-list[@id]'): + + ref_list_id = ref_list.get('id') + ref_list_title = get_text(ref_list, './/title') + for ref in ref_list.findall('.//ref[@id]'): + ref_id = ref.get('id') + ref_label = get_text(ref, 'label') + ref = _get_reference(ref) + # Avoid unbound variables + ref_publication_type, authors, editors, names = None, [], [], [] + ref_title, ref_source, ref_year, ref_doi = None, None, None, None + if ref is not None: + ref_publication_type = ref.get('publication-type') or ref.get('citation-type') + if ref_publication_type is not None: + # Get the title from the article-title tag or the whole reference + ref_title = get_text(ref, "article-title") + if ref_title is None: + ref_title = stringify_descendants(ref) + # Get the authors from the name tag or the person-group tag + if ref.find('name') is not None: + names = _get_names(ref) + else: + authors = _get_names_from_group(ref, "author") + editors = _get_names_from_group(ref, "editor") + # Get the source, year and DOI + ref_source = get_text(ref, "source") + ref_year = get_text(ref, "year") + # Get the DOI + ref_doi = _get_doi(ref) + + reference = Reference( + ref_list_id=ref_list_id, + ref_list_title=ref_list_title, + ref_id=ref_id, + ref_label=ref_label, + ref_publication_type=ref_publication_type, + authors=authors, + editors=editors, + names=names, + ref_title=ref_title, + ref_source=ref_source, + ref_year=ref_year, + ref_doi=ref_doi + ) + + new_ref_list.append(reference) + return new_ref_list diff --git a/sprynger/utils/startup.py b/sprynger/utils/startup.py index 5481bf7..4d57d78 100644 --- a/sprynger/utils/startup.py +++ b/sprynger/utils/startup.py @@ -1,92 +1,92 @@ -from configparser import ConfigParser, NoSectionError +"""Module to initialize the sprynger library.""" +import tomllib +import os from pathlib import Path -from typing import List, Optional, Union +from typing import Optional, Union -from sprynger.utils.constants import CONFIG_FILE, DEFAULT_PATHS -from sprynger.utils.create_config import create_config +from sprynger.exceptions import MissingAPIKeyError +from sprynger.utils.constants import DEFAULT_PATHS, REQUESTS +API_KEY = None CONFIG = None -CUSTOM_KEYS = None -def init(config_dir: Union[str, Path] = CONFIG_FILE, - keys: Optional[List[str]] = None) -> None: + +def init(api_key: Optional[str] = None, + config_file: Optional[Union[str, Path]] = None) -> None: """ Function to initialize the sprynger library. For more information go to the `documentation `_. Args: - config_dir (str): Path to the configuration file - keys (list): List of API keys + api_key (str): API key + config_file (str): Path to the configuration .toml file. + Raises: - FileNotFoundError: If the configuration file is not found. + ValueError: If no api key was provided either as argument or as an + environment variable `API_KEY`. Example: >>> from sprynger import init - >>> init(config_dir='path/to/custom/config.cfg', keys=['key1', 'key2']) + >>> init(key='your key', config_file='path/to/custom/config.toml') """ + global API_KEY global CONFIG - global CUSTOM_KEYS - config_dir = Path(config_dir) + CONFIG = _load_default_config() - if not config_dir.exists(): - CONFIG = create_config(config_dir, keys) - else: - CONFIG = ConfigParser() - CONFIG.optionxform = str - CONFIG.read(config_dir) + if config_file: + with open(config_file, 'rb') as f: + custom_config = tomllib.load(f) + _merge_dicts(CONFIG, custom_config) - check_sections(CONFIG) - check_default_paths(CONFIG, config_dir) - create_cache_folders(CONFIG) + _create_cache_folders(CONFIG) - CUSTOM_KEYS = keys + API_KEY = api_key or os.environ.get("API_KEY") + if not API_KEY: + raise ValueError('No API key found. Provide an API key or set the ' + 'environment variable API_KEY. To get an API key ' + 'visit: https://dev.springernature.com/') -def check_sections(config: ConfigParser) -> None: - """Auxiliary function to check if all sections exist.""" - for section in ['Directories', 'Authentication']: - if not config.has_section(section): - raise NoSectionError(section) +def _load_default_config() -> dict: + """Auxiliary function to load the default configuration.""" + config = {} + config['Directories'] = DEFAULT_PATHS + config['Requests'] = REQUESTS + return config -def check_default_paths(config: ConfigParser, - config_path: Path) -> None: - """Auxiliary function to check if default cache paths exist. - If not, the paths are writen in the config. +def _merge_dicts(default, custom): """ - for api, path in DEFAULT_PATHS.items(): - if not config.has_option('Directories', api): - config.set('Directories', api, str(path)) - with open(config_path, 'w', encoding='utf-8') as ouf: - config.write(ouf) - + Recursively merge two dictionaries. The values from the custom dictionary + will overwrite those from the default dictionary. + """ + for key, value in custom.items(): + if isinstance(value, dict) and key in default: + _merge_dicts(default[key], value) + else: + default[key] = value -def create_cache_folders(config: ConfigParser) -> None: +def _create_cache_folders(config: dict) -> None: """Auxiliary function to create cache folders.""" - for _, path in config.items('Directories'): + directories = config.get('Directories', {}) + for _, path in directories.items(): cache_path = Path(path) cache_path.mkdir(parents=True, exist_ok=True) -def get_config() -> ConfigParser: +def get_config() -> dict: """Function to get the config parser.""" if not CONFIG: - raise FileNotFoundError('No configuration file found.' + raise MissingAPIKeyError('Library not initialized. ' 'Please initialize sprynger with init().\n' 'For more information visit: ' 'the documentation.') return CONFIG -def get_keys() -> List[str]: +def get_key() -> str: """Function to get the API keys and overwrite keys in config if needed.""" - if CUSTOM_KEYS: - keys = CUSTOM_KEYS - else: - keys = [k.strip() for k in CONFIG.get('Authentication', 'APIKey').split(",")] - return keys - - + return API_KEY