Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Natural Language GAPIC client library #1018

Merged
merged 16 commits into from
Jul 28, 2017
Merged
Prev Previous commit
Next Next commit
migrate v1beta2 quickstart and snippets to gapic, and flake
  • Loading branch information
dizcology committed Jul 20, 2017
commit e34ab1d0704b10edc5ff6f12b86e2202866f408e
6 changes: 4 additions & 2 deletions language/cloud-client/v1/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ def syntax_text(text):
tokens = client.analyze_syntax(document).tokens

for token in tokens:
print(u'{}: {}'.format(POS_TAG[token.part_of_speech.tag], token.text.content))
print(u'{}: {}'.format(POS_TAG[token.part_of_speech.tag],
token.text.content))


def syntax_file(gcs_uri):
Expand All @@ -140,7 +141,8 @@ def syntax_file(gcs_uri):
tokens = client.analyze_syntax(document).tokens

for token in tokens:
print(u'{}: {}'.format(POS_TAG[token.part_of_speech.tag], token.text.content))
print(u'{}: {}'.format(POS_TAG[token.part_of_speech.tag],
token.text.content))


if __name__ == '__main__':
Expand Down
12 changes: 6 additions & 6 deletions language/cloud-client/v1beta2/quickstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@
def run_quickstart():
# [START language_quickstart]
# Imports the Google Cloud client library
from google.cloud import language
from google.cloud import language_v1beta2
from google.cloud.language_v1beta2 import types

# Instantiates a client with they v1beta2 version
language_client = language.Client(api_version='v1beta2')
client = language_v1beta2.LanguageServiceClient()

# The text to analyze
text = 'Hallo Welt!'
document = language_client.document_from_text(text, language='DE')

text = u'Hallo Welt!'
document = types.Document(content=text, type='PLAIN_TEXT', language='de')
# Detects the sentiment of the text
sentiment = document.analyze_sentiment().sentiment
sentiment = client.analyze_sentiment(document).document_sentiment

print('Text: {}'.format(text))
print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))
Expand Down
98 changes: 49 additions & 49 deletions language/cloud-client/v1beta2/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,64 +24,66 @@
import argparse
import sys

from google.cloud import language
from google.cloud.gapic.language.v1beta2 import enums
from google.cloud.gapic.language.v1beta2 import language_service_client
from google.cloud.proto.language.v1beta2 import language_service_pb2
from google.cloud import language_v1beta2
from google.cloud.language_v1beta2 import types
import six

# part-of-speech tags from google.cloud.language.enums.PartOfSpeech.Tag
POS_TAG = ('UNKNOWN', 'ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM',
'PRON', 'PRT', 'PUNCT', 'VERB', 'X', 'AFFIX')


def sentiment_text(text):
"""Detects sentiment in the text."""
language_client = language.Client(api_version='v1beta2')
client = language_v1beta2.LanguageServiceClient()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)
document = types.Document(content=text, type='PLAIN_TEXT')

# Detects sentiment in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
sentiment = document.analyze_sentiment().sentiment
sentiment = client.analyze_sentiment(document).document_sentiment

print(u'Score: {}'.format(sentiment.score))
print(u'Magnitude: {}'.format(sentiment.magnitude))
print('Score: {}'.format(sentiment.score))
print('Magnitude: {}'.format(sentiment.magnitude))


def sentiment_file(gcs_uri):
"""Detects sentiment in the file located in Google Cloud Storage."""
language_client = language.Client(api_version='v1beta2')
client = language_v1beta2.LanguageServiceClient()

# Instantiates a plain text document.
document = language_client.document_from_gcs_url(gcs_uri)
document = types.Document(gcs_content_uri=gcs_uri, type='PLAIN_TEXT')

# Detects sentiment in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
sentiment = document.analyze_sentiment().sentiment
sentiment = client.analyze_sentiment(document).document_sentiment

print(u'Score: {}'.format(sentiment.score))
print(u'Magnitude: {}'.format(sentiment.magnitude))
print('Score: {}'.format(sentiment.score))
print('Magnitude: {}'.format(sentiment.magnitude))


def entities_text(text):
"""Detects entities in the text."""
language_client = language.Client(api_version='v1beta2')
client = language_v1beta2.LanguageServiceClient()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)
document = types.Document(content=text, type='PLAIN_TEXT')

# Detects entities in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
entities = document.analyze_entities().entities
entities = client.analyze_entities(document).entities

for entity in entities:
print(u'=' * 20)
print('=' * 20)
print(u'{:<16}: {}'.format('name', entity.name))
print(u'{:<16}: {}'.format('type', entity.entity_type))
print(u'{:<16}: {}'.format('type', entity.type))
print(u'{:<16}: {}'.format('metadata', entity.metadata))
print(u'{:<16}: {}'.format('salience', entity.salience))
print(u'{:<16}: {}'.format('wikipedia_url',
Expand All @@ -90,74 +92,74 @@ def entities_text(text):

def entities_file(gcs_uri):
"""Detects entities in the file located in Google Cloud Storage."""
language_client = language.Client(api_version='v1beta2')
client = language_v1beta2.LanguageServiceClient()

# Instantiates a plain text document.
document = language_client.document_from_gcs_url(gcs_uri)
document = types.Document(gcs_content_uri=gcs_uri, type='PLAIN_TEXT')

# Detects sentiment in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
entities = document.analyze_entities().entities
entities = client.analyze_entities(document).entities

for entity in entities:
print('=' * 20)
print('{:<16}: {}'.format('name', entity.name))
print('{:<16}: {}'.format('type', entity.entity_type))
print('{:<16}: {}'.format('metadata', entity.metadata))
print('{:<16}: {}'.format('salience', entity.salience))
print('{:<16}: {}'.format('wikipedia_url',
print(u'{:<16}: {}'.format('name', entity.name))
print(u'{:<16}: {}'.format('type', entity.type))
print(u'{:<16}: {}'.format('metadata', entity.metadata))
print(u'{:<16}: {}'.format('salience', entity.salience))
print(u'{:<16}: {}'.format('wikipedia_url',
entity.metadata.get('wikipedia_url', '-')))


def syntax_text(text):
"""Detects syntax in the text."""
language_client = language.Client(api_version='v1beta2')
client = language_v1beta2.LanguageServiceClient()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)
document = types.Document(content=text, type='PLAIN_TEXT')

# Detects syntax in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
tokens = document.analyze_syntax().tokens
tokens = client.analyze_syntax(document).tokens

for token in tokens:
print(u'{}: {}'.format(token.part_of_speech.tag, token.text_content))
print(u'{}: {}'.format(POS_TAG[token.part_of_speech.tag],
token.text.content))


def syntax_file(gcs_uri):
"""Detects syntax in the file located in Google Cloud Storage."""
language_client = language.Client(api_version='v1beta2')
client = language_v1beta2.LanguageServiceClient()

# Instantiates a plain text document.
document = language_client.document_from_gcs_url(gcs_uri)
document = types.Document(gcs_content_uri=gcs_uri, type='PLAIN_TEXT')

# Detects syntax in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
tokens = document.analyze_syntax().tokens
tokens = client.analyze_syntax(document).tokens

for token in tokens:
print(u'{}: {}'.format(token.part_of_speech.tag, token.text_content))
print(u'{}: {}'.format(POS_TAG[token.part_of_speech.tag],
token.text.content))


def entity_sentiment_text(text):
"""Detects entity sentiment in the provided text."""
language_client = language_service_client.LanguageServiceClient()
document = language_service_pb2.Document()
client = language_v1beta2.LanguageServiceClient()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

document.content = text.encode('utf-8')
document.type = enums.Document.Type.PLAIN_TEXT
document = types.Document(content=text.encode('utf-8'), type='PLAIN_TEXT')

encoding = enums.EncodingType.UTF32
encoding = 'UTF32'
if sys.maxunicode == 65535:
encoding = enums.EncodingType.UTF16
encoding = 'UTF16'

result = language_client.analyze_entity_sentiment(
result = client.analyze_entity_sentiment(
document, encoding)

for entity in result.entities:
Expand All @@ -175,17 +177,15 @@ def entity_sentiment_text(text):

def entity_sentiment_file(gcs_uri):
"""Detects entity sentiment in a Google Cloud Storage file."""
language_client = language_service_client.LanguageServiceClient()
document = language_service_pb2.Document()
client = language_v1beta2.LanguageServiceClient()

document.gcs_content_uri = gcs_uri
document.type = enums.Document.Type.PLAIN_TEXT
document = types.Document(gcs_content_uri=gcs_uri, type='PLAIN_TEXT')

encoding = enums.EncodingType.UTF32
encoding = 'UTF32'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you not use the enums here (should be types.EncodingType.UTF32?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removing encoding, since it is no longer needed: googleapis/googleapis#408

Copy link
Member Author

@dizcology dizcology Jul 28, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

turns out we should still keep the encoding (added back)

The client libraries are current designed to have all enums in the enums module, such as google.cloud.language.enums.

if sys.maxunicode == 65535:
encoding = enums.EncodingType.UTF16
encoding = 'UTF16'

result = language_client.analyze_entity_sentiment(
result = client.analyze_entity_sentiment(
document, encoding)

for entity in result.entities:
Expand Down