15
15
import unittest
16
16
17
17
18
+ ANNOTATE_NAME = 'Moon'
19
+ ANNOTATE_CONTENT = 'A cow jumped over the %s.' % (ANNOTATE_NAME ,)
20
+ ANNOTATE_POLARITY = 1
21
+ ANNOTATE_MAGNITUDE = 0.2
22
+ ANNOTATE_SALIENCE = 0.11793101
23
+ ANNOTATE_WIKI_URL = 'http://en.wikipedia.org/wiki/Natural_satellite'
24
+
25
+
26
+ def _make_token_json (name , part_of_speech , head , edge_label ):
27
+ token_dict = {
28
+ 'text' : {
29
+ 'content' : name ,
30
+ 'beginOffset' : - 1 ,
31
+ },
32
+ 'partOfSpeech' : {'tag' : part_of_speech },
33
+ 'dependencyEdge' : {
34
+ 'headTokenIndex' : head ,
35
+ 'label' : edge_label ,
36
+ },
37
+ 'lemma' : name ,
38
+ }
39
+ return token_dict
40
+
41
+
42
+ def _get_token_and_sentences (include_syntax ):
43
+ from gcloud .language .token import PartOfSpeech
44
+
45
+ if include_syntax :
46
+ token_info = [
47
+ ('A' , PartOfSpeech .DETERMINER , 1 , 'DET' ),
48
+ ('cow' , PartOfSpeech .NOUN , 2 , 'NSUBJ' ),
49
+ ('jumped' , PartOfSpeech .VERB , 2 , 'ROOT' ),
50
+ ('over' , PartOfSpeech .ADPOSITION , 2 , 'PREP' ),
51
+ ('the' , PartOfSpeech .DETERMINER , 5 , 'DET' ),
52
+ (ANNOTATE_NAME , PartOfSpeech .NOUN , 3 , 'POBJ' ),
53
+ ('.' , PartOfSpeech .PUNCTUATION , 2 , 'P' ),
54
+ ]
55
+ sentences = [
56
+ {
57
+ 'text' : {
58
+ 'content' : ANNOTATE_CONTENT ,
59
+ 'beginOffset' : - 1 ,
60
+ },
61
+ },
62
+ ]
63
+ else :
64
+ token_info = []
65
+ sentences = []
66
+
67
+ return token_info , sentences
68
+
69
+
70
+ def _get_entities (include_entities ):
71
+ from gcloud .language .entity import EntityType
72
+
73
+ if include_entities :
74
+ entities = [
75
+ {
76
+ 'name' : ANNOTATE_NAME ,
77
+ 'type' : EntityType .LOCATION ,
78
+ 'metadata' : {
79
+ 'wikipedia_url' : ANNOTATE_WIKI_URL ,
80
+ },
81
+ 'salience' : ANNOTATE_SALIENCE ,
82
+ 'mentions' : [
83
+ {
84
+ 'text' : {
85
+ 'content' : ANNOTATE_NAME ,
86
+ 'beginOffset' : - 1
87
+ }
88
+ }
89
+ ]
90
+ },
91
+ ]
92
+ else :
93
+ entities = []
94
+
95
+ return entities
96
+
97
+
18
98
class TestDocument (unittest .TestCase ):
19
99
20
100
def _getTargetClass (self ):
@@ -95,8 +175,18 @@ def test__to_dict_with_no_content(self):
95
175
'type' : klass .PLAIN_TEXT ,
96
176
})
97
177
98
- def test_analyze_entities (self ):
178
+ def _verify_entity (self , entity , name , entity_type , wiki_url , salience ):
99
179
from gcloud .language .entity import Entity
180
+
181
+ self .assertIsInstance (entity , Entity )
182
+ self .assertEqual (entity .name , name )
183
+ self .assertEqual (entity .entity_type , entity_type )
184
+ self .assertEqual (entity .wikipedia_url , wiki_url )
185
+ self .assertEqual (entity .metadata , {})
186
+ self .assertEqual (entity .salience , salience )
187
+ self .assertEqual (entity .mentions , [name ])
188
+
189
+ def test_analyze_entities (self ):
100
190
from gcloud .language .entity import EntityType
101
191
102
192
name1 = 'R-O-C-K'
@@ -136,7 +226,7 @@ def test_analyze_entities(self):
136
226
],
137
227
},
138
228
],
139
- 'language' : 'en' ,
229
+ 'language' : 'en-US ' ,
140
230
}
141
231
connection = _Connection (response )
142
232
client = _Client (connection = connection )
@@ -145,31 +235,26 @@ def test_analyze_entities(self):
145
235
entities = document .analyze_entities ()
146
236
self .assertEqual (len (entities ), 2 )
147
237
entity1 = entities [0 ]
148
- self .assertIsInstance (entity1 , Entity )
149
- self .assertEqual (entity1 .name , name1 )
150
- self .assertEqual (entity1 .entity_type , EntityType .OTHER )
151
- self .assertEqual (entity1 .wikipedia_url , None )
152
- self .assertEqual (entity1 .metadata , {})
153
- self .assertEqual (entity1 .salience , salience1 )
154
- self .assertEqual (entity1 .mentions , [name1 ])
238
+ self ._verify_entity (entity1 , name1 , EntityType .OTHER ,
239
+ None , salience1 )
155
240
entity2 = entities [1 ]
156
- self .assertIsInstance (entity2 , Entity )
157
- self .assertEqual (entity2 .name , name2 )
158
- self .assertEqual (entity2 .entity_type , EntityType .LOCATION )
159
- self .assertEqual (entity2 .wikipedia_url , wiki2 )
160
- self .assertEqual (entity2 .metadata , {})
161
- self .assertEqual (entity2 .salience , salience2 )
162
- self .assertEqual (entity2 .mentions , [name2 ])
241
+ self ._verify_entity (entity2 , name2 , EntityType .LOCATION ,
242
+ wiki2 , salience2 )
163
243
164
244
# Verify the request.
165
245
self .assertEqual (len (connection ._requested ), 1 )
166
246
req = connection ._requested [0 ]
167
247
self .assertEqual (req ['path' ], 'analyzeEntities' )
168
248
self .assertEqual (req ['method' ], 'POST' )
169
249
170
- def test_analyze_sentiment (self ):
250
+ def _verify_sentiment (self , sentiment , polarity , magnitude ):
171
251
from gcloud .language .sentiment import Sentiment
172
252
253
+ self .assertIsInstance (sentiment , Sentiment )
254
+ self .assertEqual (sentiment .polarity , polarity )
255
+ self .assertEqual (sentiment .magnitude , magnitude )
256
+
257
+ def test_analyze_sentiment (self ):
173
258
content = 'All the pretty horses.'
174
259
polarity = 1
175
260
magnitude = 0.6
@@ -178,23 +263,117 @@ def test_analyze_sentiment(self):
178
263
'polarity' : polarity ,
179
264
'magnitude' : magnitude ,
180
265
},
181
- 'language' : 'en' ,
266
+ 'language' : 'en-US ' ,
182
267
}
183
268
connection = _Connection (response )
184
269
client = _Client (connection = connection )
185
270
document = self ._makeOne (client , content )
186
271
187
272
sentiment = document .analyze_sentiment ()
188
- self .assertIsInstance (sentiment , Sentiment )
189
- self .assertEqual (sentiment .polarity , polarity )
190
- self .assertEqual (sentiment .magnitude , magnitude )
273
+ self ._verify_sentiment (sentiment , polarity , magnitude )
191
274
192
275
# Verify the request.
193
276
self .assertEqual (len (connection ._requested ), 1 )
194
277
req = connection ._requested [0 ]
195
278
self .assertEqual (req ['path' ], 'analyzeSentiment' )
196
279
self .assertEqual (req ['method' ], 'POST' )
197
280
281
+ def _verify_sentences (self , include_syntax , annotations ):
282
+ from gcloud .language .token import Sentence
283
+
284
+ if include_syntax :
285
+ self .assertEqual (len (annotations .sentences ), 1 )
286
+ sentence = annotations .sentences [0 ]
287
+ self .assertIsInstance (sentence , Sentence )
288
+ self .assertEqual (sentence .content , ANNOTATE_CONTENT )
289
+ self .assertEqual (sentence .begin , - 1 )
290
+ else :
291
+ self .assertEqual (annotations .sentences , [])
292
+
293
+ def _verify_tokens (self , annotations , token_info ):
294
+ from gcloud .language .token import Token
295
+
296
+ self .assertEqual (len (annotations .tokens ), len (token_info ))
297
+ for token , info in zip (annotations .tokens , token_info ):
298
+ self .assertIsInstance (token , Token )
299
+ self .assertEqual (token .text_content , info [0 ])
300
+ self .assertEqual (token .text_begin , - 1 )
301
+ self .assertEqual (token .part_of_speech , info [1 ])
302
+ self .assertEqual (token .edge_index , info [2 ])
303
+ self .assertEqual (token .edge_label , info [3 ])
304
+ self .assertEqual (token .lemma , info [0 ])
305
+
306
+ def _annotate_text_helper (self , include_sentiment ,
307
+ include_entities , include_syntax ):
308
+ from gcloud .language .document import Annotations
309
+ from gcloud .language .entity import EntityType
310
+
311
+ token_info , sentences = _get_token_and_sentences (include_syntax )
312
+ entities = _get_entities (include_entities )
313
+ tokens = [_make_token_json (* info ) for info in token_info ]
314
+ response = {
315
+ 'sentences' : sentences ,
316
+ 'tokens' : tokens ,
317
+ 'entities' : entities ,
318
+ 'language' : 'en-US' ,
319
+ }
320
+ if include_sentiment :
321
+ response ['documentSentiment' ] = {
322
+ 'polarity' : ANNOTATE_POLARITY ,
323
+ 'magnitude' : ANNOTATE_MAGNITUDE ,
324
+ }
325
+
326
+ connection = _Connection (response )
327
+ client = _Client (connection = connection )
328
+ document = self ._makeOne (client , ANNOTATE_CONTENT )
329
+
330
+ annotations = document .annotate_text (
331
+ include_syntax = include_syntax , include_entities = include_entities ,
332
+ include_sentiment = include_sentiment )
333
+ self .assertIsInstance (annotations , Annotations )
334
+ # Sentences
335
+ self ._verify_sentences (include_syntax , annotations )
336
+ # Token
337
+ self ._verify_tokens (annotations , token_info )
338
+ # Sentiment
339
+ if include_sentiment :
340
+ self ._verify_sentiment (annotations .sentiment ,
341
+ ANNOTATE_POLARITY , ANNOTATE_MAGNITUDE )
342
+ else :
343
+ self .assertIsNone (annotations .sentiment )
344
+ # Entity
345
+ if include_entities :
346
+ self .assertEqual (len (annotations .entities ), 1 )
347
+ entity = annotations .entities [0 ]
348
+ self ._verify_entity (entity , ANNOTATE_NAME , EntityType .LOCATION ,
349
+ ANNOTATE_WIKI_URL , ANNOTATE_SALIENCE )
350
+ else :
351
+ self .assertEqual (annotations .entities , [])
352
+
353
+ # Verify the request.
354
+ self .assertEqual (len (connection ._requested ), 1 )
355
+ req = connection ._requested [0 ]
356
+ self .assertEqual (req ['path' ], 'annotateText' )
357
+ self .assertEqual (req ['method' ], 'POST' )
358
+ features = req ['data' ]['features' ]
359
+ self .assertEqual (features .get ('extractDocumentSentiment' , False ),
360
+ include_sentiment )
361
+ self .assertEqual (features .get ('extractEntities' , False ),
362
+ include_entities )
363
+ self .assertEqual (features .get ('extractSyntax' , False ), include_syntax )
364
+
365
+ def test_annotate_text (self ):
366
+ self ._annotate_text_helper (True , True , True )
367
+
368
+ def test_annotate_text_sentiment_only (self ):
369
+ self ._annotate_text_helper (True , False , False )
370
+
371
+ def test_annotate_text_entities_only (self ):
372
+ self ._annotate_text_helper (False , True , False )
373
+
374
+ def test_annotate_text_syntax_only (self ):
375
+ self ._annotate_text_helper (False , False , True )
376
+
198
377
199
378
class _Connection (object ):
200
379
0 commit comments