Skip to content

Commit 65213a1

Browse files
Do not include a default language. (#2985)
The language API auto-detects language if not is not provided, so defaulting to English is incorrect.
1 parent ec1776c commit 65213a1

File tree

2 files changed

+12
-13
lines changed

2 files changed

+12
-13
lines changed

language/google/cloud/language/document.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@
2525
from google.cloud.language.syntax import Token
2626

2727

28-
DEFAULT_LANGUAGE = 'en-US'
29-
"""Default document language, English."""
30-
31-
3228
Annotations = collections.namedtuple(
3329
'Annotations',
3430
'sentences tokens sentiment entities')
@@ -93,7 +89,7 @@ class Document(object):
9389
9490
:type language: str
9591
:param language: (Optional) The language of the document text.
96-
Defaults to :data:`DEFAULT_LANGUAGE`.
92+
Defaults to None (auto-detect).
9793
9894
:type encoding: str
9995
:param encoding: (Optional) The encoding of the document text.
@@ -115,7 +111,7 @@ class Document(object):
115111
"""HTML document type."""
116112

117113
def __init__(self, client, content=None, gcs_url=None, doc_type=PLAIN_TEXT,
118-
language=DEFAULT_LANGUAGE, encoding=Encoding.UTF8):
114+
language=None, encoding=Encoding.UTF8):
119115
if content is not None and gcs_url is not None:
120116
raise ValueError('A Document cannot contain both local text and '
121117
'a link to text in a Google Cloud Storage object')
@@ -139,8 +135,9 @@ def _to_dict(self):
139135
"""
140136
info = {
141137
'type': self.doc_type,
142-
'language': self.language,
143138
}
139+
if self.language is not None:
140+
info['language'] = self.language
144141
if self.content is not None:
145142
info['content'] = self.content
146143
elif self.gcs_url is not None:

language/unit_tests/test_document.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ def test_constructor_defaults(self):
125125
self.assertIs(document.client, client)
126126
self.assertEqual(document.content, content)
127127
self.assertIsNone(document.gcs_url)
128+
self.assertIsNone(document.language)
128129
self.assertEqual(document.doc_type, MUT.Document.PLAIN_TEXT)
129-
self.assertEqual(document.language, MUT.DEFAULT_LANGUAGE)
130130
self.assertEqual(document.encoding, MUT.Encoding.UTF8)
131131

132132
def test_constructor_explicit(self):
@@ -146,6 +146,13 @@ def test_constructor_explicit(self):
146146
self.assertEqual(document.language, language)
147147
self.assertEqual(document.encoding, MUT.Encoding.UTF32)
148148

149+
def test_constructor_explicit_language(self):
150+
client = object()
151+
content = 'abc'
152+
document = self._make_one(client, content, language='en-US')
153+
self.assertEqual(document.language, 'en-US')
154+
self.assertEqual(document._to_dict()['language'], 'en-US')
155+
149156
def test_constructor_no_text(self):
150157
with self.assertRaises(ValueError):
151158
self._make_one(None, content=None, gcs_url=None)
@@ -162,7 +169,6 @@ def test__to_dict_with_content(self):
162169
info = document._to_dict()
163170
self.assertEqual(info, {
164171
'content': content,
165-
'language': document.language,
166172
'type': klass.PLAIN_TEXT,
167173
})
168174

@@ -173,7 +179,6 @@ def test__to_dict_with_gcs(self):
173179
info = document._to_dict()
174180
self.assertEqual(info, {
175181
'gcsContentUri': gcs_url,
176-
'language': document.language,
177182
'type': klass.PLAIN_TEXT,
178183
})
179184

@@ -183,7 +188,6 @@ def test__to_dict_with_no_content(self):
183188
document.content = None # Manually unset the content.
184189
info = document._to_dict()
185190
self.assertEqual(info, {
186-
'language': document.language,
187191
'type': klass.PLAIN_TEXT,
188192
})
189193

@@ -203,12 +207,10 @@ def _expected_data(content, encoding_type=None,
203207
extract_sentiment=False,
204208
extract_entities=False,
205209
extract_syntax=False):
206-
from google.cloud.language.document import DEFAULT_LANGUAGE
207210
from google.cloud.language.document import Document
208211

209212
expected = {
210213
'document': {
211-
'language': DEFAULT_LANGUAGE,
212214
'type': Document.PLAIN_TEXT,
213215
'content': content,
214216
},

0 commit comments

Comments
 (0)