-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Add GCS examples for Natural Language #432
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
81d550f
4baae60
a5f7fc0
bd07872
c701f29
dbe274f
cc6ec31
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,12 +59,18 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept | |
|
|
||
| Analyze app = new Analyze(LanguageServiceClient.create()); | ||
|
|
||
| if (command.equals("entities")) { | ||
| printEntities(System.out, app.analyzeEntities(text)); | ||
| } else if (command.equals("sentiment")) { | ||
| printSentiment(System.out, app.analyzeSentiment(text)); | ||
| } else if (command.equals("syntax")) { | ||
| printSyntax(System.out, app.analyzeSyntax(text)); | ||
| if (command.equals("entities-text")) { | ||
| printEntities(System.out, app.analyzeEntitiesText(text)); | ||
| } else if (command.equals("entities-file")) { | ||
| printEntities(System.out, app.analyzeEntitiesFile(text)); | ||
| } else if (command.equals("sentiment-text")) { | ||
| printSentiment(System.out, app.analyzeSentimentText(text)); | ||
| } else if (command.equals("sentiment-file")) { | ||
| printSentiment(System.out, app.analyzeSentimentFile(text)); | ||
| } else if (command.equals("syntax-text")) { | ||
| printSyntax(System.out, app.analyzeSyntaxText(text)); | ||
| } else if (command.equals("syntax-file")) { | ||
| printSyntax(System.out, app.analyzeSyntaxFile(text)); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -153,7 +159,7 @@ public Analyze(LanguageServiceClient languageApi) { | |
| /** | ||
| * Gets {@link Entity}s from the string {@code text}. | ||
| */ | ||
| public List<Entity> analyzeEntities(String text) throws IOException { | ||
| public List<Entity> analyzeEntitiesText(String text) throws IOException { | ||
| Document doc = Document.newBuilder() | ||
| .setContent(text).setType(Type.PLAIN_TEXT).build(); | ||
| AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() | ||
|
|
@@ -163,20 +169,43 @@ public List<Entity> analyzeEntities(String text) throws IOException { | |
| return response.getEntitiesList(); | ||
| } | ||
|
|
||
| /** | ||
| * Gets {@link Entity}s from the string representing the GCS {@code path}. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps: Otherwise it sounds like it's looking for entities in the path string itself. (here and below)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| */ | ||
| public List<Entity> analyzeEntitiesFile(String path) throws IOException { | ||
| Document doc = Document.newBuilder() | ||
| .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); | ||
| AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() | ||
| .setDocument(doc) | ||
| .setEncodingType(EncodingType.UTF16).build(); | ||
| AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request); | ||
| return response.getEntitiesList(); | ||
| } | ||
|
|
||
| /** | ||
| * Gets {@link Sentiment} from the string {@code text}. | ||
| */ | ||
| public Sentiment analyzeSentiment(String text) throws IOException { | ||
| public Sentiment analyzeSentimentText(String text) throws IOException { | ||
| Document doc = Document.newBuilder() | ||
| .setContent(text).setType(Type.PLAIN_TEXT).build(); | ||
| AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); | ||
| return response.getDocumentSentiment(); | ||
| } | ||
|
|
||
| /** | ||
| * Gets {@link Sentiment} from the string representing the GCS {@code path}. | ||
| */ | ||
| public Sentiment analyzeSentimentFile(String path) throws IOException { | ||
| Document doc = Document.newBuilder() | ||
| .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); | ||
| AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); | ||
| return response.getDocumentSentiment(); | ||
| } | ||
|
|
||
| /** | ||
| * Gets {@link Token}s from the string {@code text}. | ||
| */ | ||
| public List<Token> analyzeSyntax(String text) throws IOException { | ||
| public List<Token> analyzeSyntaxText(String text) throws IOException { | ||
| Document doc = Document.newBuilder() | ||
| .setContent(text).setType(Type.PLAIN_TEXT).build(); | ||
| AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() | ||
|
|
@@ -185,4 +214,17 @@ public List<Token> analyzeSyntax(String text) throws IOException { | |
| AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); | ||
| return response.getTokensList(); | ||
| } | ||
|
|
||
| /** | ||
| * Gets {@link Token}s from the string representing the GCS {@code path}. | ||
| */ | ||
| public List<Token> analyzeSyntaxFile(String path) throws IOException { | ||
| Document doc = Document.newBuilder() | ||
| .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); | ||
| AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() | ||
| .setDocument(doc) | ||
| .setEncodingType(EncodingType.UTF16).build(); | ||
| AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); | ||
| return response.getTokensList(); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,7 +48,7 @@ public class AnalyzeIT { | |
| @Test public void analyzeEntities_withEntities_returnsLarryPage() throws Exception { | ||
| // Act | ||
| List<Entity> entities = | ||
| analyzeApp.analyzeEntities( | ||
| analyzeApp.analyzeEntitiesText( | ||
| "Larry Page, Google's co-founder, once described the 'perfect search engine' as" | ||
| + " something that 'understands exactly what you mean and gives you back exactly what" | ||
| + " you want.' Since he spoke those words Google has grown to offer products beyond" | ||
|
|
@@ -59,32 +59,75 @@ public class AnalyzeIT { | |
| assertThat(got).named("entity names").contains("Larry Page"); | ||
| } | ||
|
|
||
| @Test public void analyzeSentiment_returnPositive() throws Exception { | ||
| @Test public void analyzeEntities_withEntitiesFile_containsGod() throws Exception { | ||
| // Act | ||
| List<Entity> entities = | ||
| analyzeApp.analyzeEntitiesFile("gs://cloud-samples-tests/natural-language/gettysburg.txt"); | ||
| List<String> got = entities.stream().map(e -> e.getName()).collect(Collectors.toList()); | ||
|
|
||
| // Assert | ||
| assertThat(got).named("entity names").contains("God"); | ||
| } | ||
|
|
||
| @Test public void analyzeSentimentText_returnPositive() throws Exception { | ||
| // Act | ||
| Sentiment sentiment = | ||
| analyzeApp.analyzeSentiment( | ||
| analyzeApp.analyzeSentimentText( | ||
| "Tom Cruise is one of the finest actors in hollywood and a great star!"); | ||
|
|
||
| // Assert | ||
| assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); | ||
| assertThat((double)sentiment.getScore()).isGreaterThan(0.0); | ||
| } | ||
|
|
||
| @Test public void analyzeSentimentFile_returnPositiveFile() throws Exception { | ||
| // Act | ||
| Sentiment sentiment = | ||
| analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" | ||
| + "sentiment/bladerunner-pos.txt"); | ||
|
|
||
| // Assert | ||
| assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); | ||
| assertThat((double)sentiment.getScore()).isGreaterThan(0.0); | ||
| } | ||
|
|
||
| @Test public void analyzeSentiment_returnNegative() throws Exception { | ||
| // Act | ||
| Sentiment sentiment = | ||
| analyzeApp.analyzeSentiment( | ||
| analyzeApp.analyzeSentimentText( | ||
| "That was the worst performance I've seen in awhile."); | ||
|
|
||
| // Assert | ||
| assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); | ||
| assertThat((double)sentiment.getScore()).isLessThan(0.0); | ||
| } | ||
|
|
||
| @Test public void analyzeSentiment_returnNegativeFile() throws Exception { | ||
| // Act | ||
| Sentiment sentiment = | ||
| analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" | ||
| + "sentiment/bladerunner-neg.txt"); | ||
|
|
||
| // Assert | ||
| assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); | ||
| assertThat((double)sentiment.getScore()).isLessThan(0.0); | ||
| } | ||
|
|
||
| @Test public void analyzeSentiment_returnNeutralFile() throws Exception { | ||
| // Act | ||
| Sentiment sentiment = | ||
| analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" | ||
| + "sentiment/bladerunner-neutral.txt"); | ||
|
|
||
| // Assert | ||
| assertThat((double)sentiment.getMagnitude()).isGreaterThan(1.0); | ||
| assertThat((double)sentiment.getScore()).isWithin(0.0); | ||
|
||
| } | ||
|
|
||
| @Test public void analyzeSyntax_partOfSpeech() throws Exception { | ||
| // Act | ||
| List<Token> token = | ||
| analyzeApp.analyzeSyntax( | ||
| analyzeApp.analyzeSyntaxText( | ||
| "President Obama was elected for the second term"); | ||
|
|
||
| List<Tag> got = token.stream().map(e -> e.getPartOfSpeech().getTag()) | ||
|
|
@@ -94,4 +137,20 @@ public class AnalyzeIT { | |
| assertThat(got).containsExactly(Tag.NOUN, Tag.NOUN, Tag.VERB, | ||
| Tag.VERB, Tag.ADP, Tag.DET, Tag.ADJ, Tag.NOUN).inOrder(); | ||
| } | ||
|
|
||
| @Test public void analyzeSyntax_partOfSpeechFile() throws Exception { | ||
| // Act | ||
| List<Token> token = | ||
| analyzeApp.analyzeSyntaxFile("gs://cloud-samples-tests/natural-language/" | ||
| + "sentiment/bladerunner-neutral.txt"); | ||
|
|
||
| List<Tag> got = token.stream().map(e -> e.getPartOfSpeech().getTag()) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| // Assert | ||
| assertThat(got).containsExactly(Tag.PRON, Tag.CONJ, Tag.VERB, Tag.CONJ, Tag.VERB, | ||
| Tag.DET, Tag.NOUN, Tag.PUNCT, Tag.NOUN, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.CONJ, | ||
| Tag.ADV, Tag.PRON, Tag.VERB, Tag.VERB, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.DET, | ||
| Tag.NOUN, Tag.VERB, Tag.ADV, Tag.ADJ,Tag.PUNCT).inOrder(); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oi. Perhaps would be cleaner to just detect a gcs uri? As in:
inside
Analyze.javaThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That would moves us toward writing library code in the snippets, and the other 6 languages have separate snippets for raw strings vs gcs files, so Java would need to do the same to fit into the docs where the other snippets live.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think he just means in the command parser, I am in favor of simplifying the args parser for a lighter-weight usage string.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yup - yay!