Skip to content

Commit

Permalink
Merge pull request #230 from jacquerie/remove-get_nested
Browse files Browse the repository at this point in the history
global: replace get_nested util with get_value
  • Loading branch information
jacquerie authored Feb 27, 2018
2 parents 5f1212c + 5d12cbe commit a922bd5
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 49 deletions.
26 changes: 13 additions & 13 deletions hepcrawl/spiders/aps_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@

from scrapy import Request

from inspire_utils.record import get_value

from . import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..parsers import JatsParser
from ..utils import (
get_licenses,
get_nested,
build_dict,
ParsedItem,
strict_kwargs,
Expand Down Expand Up @@ -87,7 +88,7 @@ def parse(self, response):
aps_response = json.loads(response.body_as_unicode())

for article in aps_response['data']:
doi = get_nested(article, 'identifiers', 'doi')
doi = get_value(article, 'identifiers.doi', default='')

if doi:
request = Request(url='{}/{}'.format(self.aps_base_url, doi),
Expand Down Expand Up @@ -124,12 +125,12 @@ def _parse_json_on_failure(self, failure):
record = HEPLoader(item=HEPRecord(), response=original_response)
article = failure.request.meta['json_article']

doi = get_nested(article, 'identifiers', 'doi')
doi = get_value(article, 'identifiers.doi', default='')
record.add_dois(dois_values=[doi])
record.add_value('page_nr', str(article.get('numPages', '')))

record.add_value('abstract', get_nested(article, 'abstract', 'value'))
record.add_value('title', get_nested(article, 'title', 'value'))
record.add_value('abstract', get_value(article, 'abstract.value', default=''))
record.add_value('title', get_value(article, 'title.value', default=''))
# record.add_value('subtitle', '')

authors, collaborations = self._get_authors_and_collab(article)
Expand All @@ -140,27 +141,26 @@ def _parse_json_on_failure(self, failure):
# record.add_value('classification_numbers', classification_numbers)

record.add_value('journal_title',
get_nested(article, 'journal', 'abbreviatedName'))
get_value(article, 'journal.abbreviatedName', default=''))
record.add_value('journal_issue',
get_nested(article, 'issue', 'number'))
get_value(article, 'issue.number', default=''))
record.add_value('journal_volume',
get_nested(article, 'volume', 'number'))
get_value(article, 'volume.number', default=''))
# record.add_value('journal_artid', )

published_date = article.get('date', '')
record.add_value('journal_year', int(published_date[:4]))
record.add_value('date_published', published_date)
record.add_value('copyright_holder',
get_nested(article, 'rights', 'copyrightHolders')[0][
'name'])
get_value(article, 'rights.copyrightHolders.name[0]', default=''))
record.add_value('copyright_year',
str(get_nested(article, 'rights', 'copyrightYear')))
str(get_value(article, 'rights.copyrightYear', default='')))
record.add_value('copyright_statement',
get_nested(article, 'rights', 'rightsStatement'))
get_value(article, 'rights.rightsStatement', default=''))
record.add_value('copyright_material', 'publication')

license = get_licenses(
license_url=get_nested(article, 'rights', 'licenses')[0]['url']
license_url=get_value(article, 'rights.licenses.url[0]', default='')
)
record.add_value('license', license)

Expand Down
13 changes: 0 additions & 13 deletions hepcrawl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,19 +230,6 @@ def get_temporary_file(prefix="tmp_",
return filepath


def get_nested(root, *keys):
"""
Returns the nested value of the provided key series.
Returns '' otherwise
"""
if not keys:
return root
if keys[0] not in root:
return ''
if keys[0] in root:
return get_nested(root[keys[0]], *keys[1:])


def build_dict(seq, key):
"""
Creates a dictionary from a list, using the specified key.
Expand Down
23 changes: 0 additions & 23 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
get_first,
get_journal_and_section,
get_mime_type,
get_nested,
get_node,
has_numbers,
parse_domain,
Expand Down Expand Up @@ -54,20 +53,6 @@ def netrcfile():
)


@pytest.fixture
def nested_json():
"""An example JSON to test the get_nested function."""
return {
'a': {
'b': 'example_b',
'b1': {
'c': 'example_c'
}
},
'a1': 'example_a1'
}


@pytest.fixture
def list_for_dict():
"""An example list to test the build_dict function."""
Expand Down Expand Up @@ -112,14 +97,6 @@ def test_ftp_connection_info(netrcfile):
assert info['ftp_password'] == 'test'


def test_get_nested(nested_json):
"""Test the results of recursively parsing a nested dict."""
assert get_nested(nested_json, 'a1') == 'example_a1'
assert get_nested(nested_json, 'a', 'b') == 'example_b'
assert get_nested(nested_json, 'a', 'b1', 'c') == 'example_c'
assert get_nested(nested_json, 'a', 'b2') == ''


def test_build_dict(list_for_dict):
"""Test the list to dict function, based on a specific key."""
dict_from_list = build_dict(list_for_dict, 'id')
Expand Down

0 comments on commit a922bd5

Please sign in to comment.