From adc3b64cf482f661d81fd0af704481cc17f98e90 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 23 Aug 2016 13:39:57 -0700 Subject: [PATCH] Splitting out wikipedia_url as own property in language. --- docs/language-usage.rst | 54 ++++++++++++++++++-------------- gcloud/language/entity.py | 5 +++ gcloud/language/test_document.py | 18 +++++------ gcloud/language/test_entity.py | 15 ++++++--- system_tests/language.py | 15 ++++----- 5 files changed, 61 insertions(+), 46 deletions(-) diff --git a/docs/language-usage.rst b/docs/language-usage.rst index e8d8212b10af6..c61076d6df2c4 100644 --- a/docs/language-usage.rst +++ b/docs/language-usage.rst @@ -171,25 +171,29 @@ metadata and other properties. >>> entities = document.analyze_entities() >>> for entity in entities: ... print('=' * 20) - ... print(' name: %s' % (entity.name,)) - ... print(' type: %s' % (entity.entity_type,)) - ... print('metadata: %s' % (entity.metadata,)) - ... print('salience: %s' % (entity.salience,)) + ... print(' name: %s' % (entity.name,)) + ... print(' type: %s' % (entity.entity_type,)) + ... print('wikipedia_url: %s' % (entity.wikipedia_url,)) + ... print(' metadata: %s' % (entity.metadata,)) + ... print(' salience: %s' % (entity.salience,)) ==================== - name: Michelangelo Caravaggio - type: PERSON - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/Caravaggio'} - salience: 0.7615959 + name: Michelangelo Caravaggio + type: PERSON + wikipedia_url: http://en.wikipedia.org/wiki/Caravaggio + metadata: {} + salience: 0.7615959 ==================== - name: Italian - type: LOCATION - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} - salience: 0.19960518 + name: Italian + type: LOCATION + wikipedia_url: http://en.wikipedia.org/wiki/Italy + metadata: {} + salience: 0.19960518 ==================== - name: The Calling of Saint Matthew - type: EVENT - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/The_Calling_of_St_Matthew_(Caravaggio)'} - salience: 0.038798928 + name: The Calling of Saint Matthew + type: EVENT + wikipedia_url: http://en.wikipedia.org/wiki/The_Calling_of_St_Matthew_(Caravaggio) + metadata: {} + salience: 0.038798928 Analyze Sentiment ----------------- @@ -266,14 +270,16 @@ the response is :data:`None`. >>> # Entities present if include_entities=True >>> for entity in annotations.entities: ... print('=' * 20) - ... print(' name: %s' % (entity.name,)) - ... print(' type: %s' % (entity.entity_type,)) - ... print('metadata: %s' % (entity.metadata,)) - ... print('salience: %s' % (entity.salience,)) + ... print(' name: %s' % (entity.name,)) + ... print(' type: %s' % (entity.entity_type,)) + ... print('wikipedia_url: %s' % (entity.wikipedia_url,)) + ... print(' metadata: %s' % (entity.metadata,)) + ... print(' salience: %s' % (entity.salience,)) ==================== - name: Moon - type: LOCATION - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/Natural_satellite'} - salience: 0.11793101 + name: Moon + type: LOCATION + wikipedia_url: http://en.wikipedia.org/wiki/Natural_satellite + metadata: {} + salience: 0.11793101 .. _Features: https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/annotateText#Features diff --git a/gcloud/language/entity.py b/gcloud/language/entity.py index c7254c06dc16a..0b1c26f92da4a 100644 --- a/gcloud/language/entity.py +++ b/gcloud/language/entity.py @@ -53,6 +53,10 @@ class Entity(object): an organization, or location. The API associates information, such as salience and mentions, with entities. + The only supported metadata (as of August 2016) is ``wikipedia_url``, + so this value will be removed from the passed in ``metadata`` + and put in its own property. + See: https://cloud.google.com/natural-language/reference/rest/v1beta1/Entity @@ -78,6 +82,7 @@ class Entity(object): def __init__(self, name, entity_type, metadata, salience, mentions): self.name = name self.entity_type = entity_type + self.wikipedia_url = metadata.pop('wikipedia_url', None) self.metadata = metadata self.salience = salience self.mentions = mentions diff --git a/gcloud/language/test_document.py b/gcloud/language/test_document.py index 66ee3d2190797..d144c50a2b6c3 100644 --- a/gcloud/language/test_document.py +++ b/gcloud/language/test_document.py @@ -102,12 +102,8 @@ def test_analyze_entities(self): name1 = 'R-O-C-K' name2 = 'USA' content = name1 + ' in the ' + name2 - metadata1 = { - 'wikipedia_url': 'http://en.wikipedia.org/wiki/Rock_music', - } - metadata2 = { - 'wikipedia_url': 'http://en.wikipedia.org/wiki/United_States', - } + wiki1 = 'http://en.wikipedia.org/wiki/Rock_music' + wiki2 = 'http://en.wikipedia.org/wiki/United_States' salience1 = 0.91391456 salience2 = 0.086085409 response = { @@ -115,7 +111,7 @@ def test_analyze_entities(self): { 'name': name1, 'type': EntityType.OTHER, - 'metadata': metadata1, + 'metadata': {'wikipedia_url': wiki1}, 'salience': salience1, 'mentions': [ { @@ -129,7 +125,7 @@ def test_analyze_entities(self): { 'name': name2, 'type': EntityType.LOCATION, - 'metadata': metadata2, + 'metadata': {'wikipedia_url': wiki2}, 'salience': salience2, 'mentions': [ { @@ -153,14 +149,16 @@ def test_analyze_entities(self): self.assertIsInstance(entity1, Entity) self.assertEqual(entity1.name, name1) self.assertEqual(entity1.entity_type, EntityType.OTHER) - self.assertEqual(entity1.metadata, metadata1) + self.assertEqual(entity1.wikipedia_url, wiki1) + self.assertEqual(entity1.metadata, {}) self.assertEqual(entity1.salience, salience1) self.assertEqual(entity1.mentions, [name1]) entity2 = entities[1] self.assertIsInstance(entity2, Entity) self.assertEqual(entity2.name, name2) self.assertEqual(entity2.entity_type, EntityType.LOCATION) - self.assertEqual(entity2.metadata, metadata2) + self.assertEqual(entity2.wikipedia_url, wiki2) + self.assertEqual(entity2.metadata, {}) self.assertEqual(entity2.salience, salience2) self.assertEqual(entity2.mentions, [name2]) diff --git a/gcloud/language/test_entity.py b/gcloud/language/test_entity.py index 7eab34a401012..34dde32c0ae1f 100644 --- a/gcloud/language/test_entity.py +++ b/gcloud/language/test_entity.py @@ -27,14 +27,18 @@ def _makeOne(self, *args, **kw): def test_constructor_defaults(self): name = 'Italian' entity_type = 'LOCATION' - metadata = {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} + wiki_url = 'http://en.wikipedia.org/wiki/Italy' + metadata = {'wikipedia_url': wiki_url} + base_metadata = {'foo': 'bar'} + metadata.update(base_metadata) salience = 0.19960518 mentions = ['Italian'] entity = self._makeOne(name, entity_type, metadata, salience, mentions) self.assertEqual(entity.name, name) self.assertEqual(entity.entity_type, entity_type) - self.assertEqual(entity.metadata, metadata) + self.assertEqual(entity.wikipedia_url, wiki_url) + self.assertEqual(entity.metadata, base_metadata) self.assertEqual(entity.salience, salience) self.assertEqual(entity.mentions, mentions) @@ -43,7 +47,7 @@ def test_from_api_repr(self): name = 'Italy' entity_type = 'LOCATION' salience = 0.223 - metadata = {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} + wiki_url = 'http://en.wikipedia.org/wiki/Italy' mention1 = 'Italy' mention2 = 'To Italy' mention3 = 'From Italy' @@ -51,7 +55,7 @@ def test_from_api_repr(self): 'name': name, 'type': entity_type, 'salience': salience, - 'metadata': metadata, + 'metadata': {'wikipedia_url': wiki_url}, 'mentions': [ {'text': {'content': mention1}}, {'text': {'content': mention2}}, @@ -62,5 +66,6 @@ def test_from_api_repr(self): self.assertEqual(entity.name, name) self.assertEqual(entity.entity_type, entity_type) self.assertEqual(entity.salience, salience) - self.assertEqual(entity.metadata, metadata) + self.assertEqual(entity.wikipedia_url, wiki_url) + self.assertEqual(entity.metadata, {}) self.assertEqual(entity.mentions, [mention1, mention2, mention3]) diff --git a/system_tests/language.py b/system_tests/language.py index 56c8e373b5267..c23afc8711dde 100644 --- a/system_tests/language.py +++ b/system_tests/language.py @@ -46,17 +46,17 @@ def test_analyze_entities(self): self.assertEqual(entity1.entity_type, EntityType.PERSON) self.assertTrue(0.7 < entity1.salience < 0.8) self.assertEqual(entity1.mentions, [entity1.name]) - self.assertEqual(entity1.metadata, { - 'wikipedia_url': 'http://en.wikipedia.org/wiki/Caravaggio', - }) + self.assertEqual(entity1.wikipedia_url, + 'http://en.wikipedia.org/wiki/Caravaggio') + self.assertEqual(entity1.metadata, {}) # Verify entity 2. self.assertEqual(entity2.name, 'Italian') self.assertEqual(entity2.entity_type, EntityType.LOCATION) self.assertTrue(0.15 < entity2.salience < 0.25) self.assertEqual(entity2.mentions, [entity2.name]) - self.assertEqual(entity2.metadata, { - 'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy', - }) + self.assertEqual(entity2.wikipedia_url, + 'http://en.wikipedia.org/wiki/Italy') + self.assertEqual(entity2.metadata, {}) # Verify entity 3. self.assertEqual(entity3.name, 'The Calling of Saint Matthew') self.assertEqual(entity3.entity_type, EntityType.EVENT) @@ -64,4 +64,5 @@ def test_analyze_entities(self): self.assertEqual(entity3.mentions, [entity3.name]) wiki_url = ('http://en.wikipedia.org/wiki/' 'The_Calling_of_St_Matthew_(Caravaggio)') - self.assertEqual(entity3.metadata, {'wikipedia_url': wiki_url}) + self.assertEqual(entity3.wikipedia_url, wiki_url) + self.assertEqual(entity3.metadata, {})