Skip to content

Commit 7770807

Browse files
authored
Merge pull request #432 from GoogleCloudPlatform/language-gcs
Add GCS examples for Natural Language
2 parents d407c61 + cc6ec31 commit 7770807

File tree

3 files changed

+126
-14
lines changed

3 files changed

+126
-14
lines changed

language/analysis/README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ mvn clean compile assembly:single
3030
```
3131

3232
We can then run the assembled JAR file with the `java` command. The variable $COMMAND takes
33-
three values `entities`, `sentiment` or `syntax`.
33+
three values `entities`, `sentiment`, or `syntax`.
3434

3535
```
3636
MAIN_CLASS=com.google.cloud.language.samples.Analyze
3737
JAR_FILE=target/language-entities-1.0-jar-with-dependencies.jar
38-
java -cp $JAR_FILE $MAIN_CLASS <sentiment|entities|syntax> <text>
38+
java -cp $JAR_FILE $MAIN_CLASS <sentiment|entities|syntax> <text|path>
3939
```
4040

4141
Example usage:
@@ -47,7 +47,9 @@ QUOTE="Larry Page, Google's co-founder, once described the 'perfect search
4747
offer products beyond search, but the spirit of what he said remains."
4848
4949
java -cp $JAR_FILE $MAIN_CLASS entities "$QUOTE"
50+
java -cp $JAR_FILE $MAIN_CLASS entities "gs://bucket/file.txt"
5051
java -cp $JAR_FILE $MAIN_CLASS sentiment "$QUOTE"
52+
java -cp $JAR_FILE $MAIN_CLASS sentiment "gs://bucket/file.txt"
5153
java -cp $JAR_FILE $MAIN_CLASS syntax "$QUOTE"
54+
java -cp $JAR_FILE $MAIN_CLASS syntax "gs://bucket/file.txt"
5255
```
53-

language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,23 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept
6060
Analyze app = new Analyze(LanguageServiceClient.create());
6161

6262
if (command.equals("entities")) {
63-
printEntities(System.out, app.analyzeEntities(text));
63+
if (text.startsWith("gs://")) {
64+
printEntities(System.out, app.analyzeEntitiesFile(text));
65+
} else {
66+
printEntities(System.out, app.analyzeEntitiesText(text));
67+
}
6468
} else if (command.equals("sentiment")) {
65-
printSentiment(System.out, app.analyzeSentiment(text));
69+
if (text.startsWith("gs://")) {
70+
printSentiment(System.out, app.analyzeSentimentFile(text));
71+
} else {
72+
printSentiment(System.out, app.analyzeSentimentText(text));
73+
}
6674
} else if (command.equals("syntax")) {
67-
printSyntax(System.out, app.analyzeSyntax(text));
75+
if (text.startsWith("gs://")) {
76+
printSyntax(System.out, app.analyzeSyntaxFile(text));
77+
} else {
78+
printSyntax(System.out, app.analyzeSyntaxText(text));
79+
}
6880
}
6981
}
7082

@@ -111,6 +123,9 @@ public static void printSentiment(PrintStream out, Sentiment sentiment) {
111123
out.printf("\tScore: %.3f\n", sentiment.getScore());
112124
}
113125

126+
/**
127+
* Prints the Syntax for the {@code tokens}.
128+
*/
114129
public static void printSyntax(PrintStream out, List<Token> tokens) {
115130
if (tokens == null || tokens.size() == 0) {
116131
out.println("No syntax found");
@@ -153,7 +168,7 @@ public Analyze(LanguageServiceClient languageApi) {
153168
/**
154169
* Gets {@link Entity}s from the string {@code text}.
155170
*/
156-
public List<Entity> analyzeEntities(String text) throws IOException {
171+
public List<Entity> analyzeEntitiesText(String text) throws IOException {
157172
Document doc = Document.newBuilder()
158173
.setContent(text).setType(Type.PLAIN_TEXT).build();
159174
AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder()
@@ -163,20 +178,43 @@ public List<Entity> analyzeEntities(String text) throws IOException {
163178
return response.getEntitiesList();
164179
}
165180

181+
/**
182+
* Gets {@link Entity}s from the contents of the object at the given GCS {@code path}.
183+
*/
184+
public List<Entity> analyzeEntitiesFile(String path) throws IOException {
185+
Document doc = Document.newBuilder()
186+
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
187+
AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder()
188+
.setDocument(doc)
189+
.setEncodingType(EncodingType.UTF16).build();
190+
AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request);
191+
return response.getEntitiesList();
192+
}
193+
166194
/**
167195
* Gets {@link Sentiment} from the string {@code text}.
168196
*/
169-
public Sentiment analyzeSentiment(String text) throws IOException {
197+
public Sentiment analyzeSentimentText(String text) throws IOException {
170198
Document doc = Document.newBuilder()
171199
.setContent(text).setType(Type.PLAIN_TEXT).build();
172200
AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc);
173201
return response.getDocumentSentiment();
174202
}
175203

204+
/**
205+
* Gets {@link Sentiment} from the contents of the object at the given GCS {@code path}.
206+
*/
207+
public Sentiment analyzeSentimentFile(String path) throws IOException {
208+
Document doc = Document.newBuilder()
209+
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
210+
AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc);
211+
return response.getDocumentSentiment();
212+
}
213+
176214
/**
177215
* Gets {@link Token}s from the string {@code text}.
178216
*/
179-
public List<Token> analyzeSyntax(String text) throws IOException {
217+
public List<Token> analyzeSyntaxText(String text) throws IOException {
180218
Document doc = Document.newBuilder()
181219
.setContent(text).setType(Type.PLAIN_TEXT).build();
182220
AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder()
@@ -185,4 +223,17 @@ public List<Token> analyzeSyntax(String text) throws IOException {
185223
AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request);
186224
return response.getTokensList();
187225
}
226+
227+
/**
228+
* Gets {@link Token}s from the contents of the object at the given GCS {@code path}.
229+
*/
230+
public List<Token> analyzeSyntaxFile(String path) throws IOException {
231+
Document doc = Document.newBuilder()
232+
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
233+
AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder()
234+
.setDocument(doc)
235+
.setEncodingType(EncodingType.UTF16).build();
236+
AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request);
237+
return response.getTokensList();
238+
}
188239
}

language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public class AnalyzeIT {
4848
@Test public void analyzeEntities_withEntities_returnsLarryPage() throws Exception {
4949
// Act
5050
List<Entity> entities =
51-
analyzeApp.analyzeEntities(
51+
analyzeApp.analyzeEntitiesText(
5252
"Larry Page, Google's co-founder, once described the 'perfect search engine' as"
5353
+ " something that 'understands exactly what you mean and gives you back exactly what"
5454
+ " you want.' Since he spoke those words Google has grown to offer products beyond"
@@ -59,32 +59,75 @@ public class AnalyzeIT {
5959
assertThat(got).named("entity names").contains("Larry Page");
6060
}
6161

62-
@Test public void analyzeSentiment_returnPositive() throws Exception {
62+
@Test public void analyzeEntities_withEntitiesFile_containsGod() throws Exception {
63+
// Act
64+
List<Entity> entities =
65+
analyzeApp.analyzeEntitiesFile("gs://cloud-samples-tests/natural-language/gettysburg.txt");
66+
List<String> got = entities.stream().map(e -> e.getName()).collect(Collectors.toList());
67+
68+
// Assert
69+
assertThat(got).named("entity names").contains("God");
70+
}
71+
72+
@Test public void analyzeSentimentText_returnPositive() throws Exception {
6373
// Act
6474
Sentiment sentiment =
65-
analyzeApp.analyzeSentiment(
75+
analyzeApp.analyzeSentimentText(
6676
"Tom Cruise is one of the finest actors in hollywood and a great star!");
6777

6878
// Assert
6979
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
7080
assertThat((double)sentiment.getScore()).isGreaterThan(0.0);
7181
}
7282

83+
@Test public void analyzeSentimentFile_returnPositiveFile() throws Exception {
84+
// Act
85+
Sentiment sentiment =
86+
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
87+
+ "sentiment/bladerunner-pos.txt");
88+
89+
// Assert
90+
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
91+
assertThat((double)sentiment.getScore()).isGreaterThan(0.0);
92+
}
93+
7394
@Test public void analyzeSentiment_returnNegative() throws Exception {
7495
// Act
7596
Sentiment sentiment =
76-
analyzeApp.analyzeSentiment(
97+
analyzeApp.analyzeSentimentText(
7798
"That was the worst performance I've seen in awhile.");
7899

79100
// Assert
80101
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
81102
assertThat((double)sentiment.getScore()).isLessThan(0.0);
82103
}
83104

105+
@Test public void analyzeSentiment_returnNegativeFile() throws Exception {
106+
// Act
107+
Sentiment sentiment =
108+
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
109+
+ "sentiment/bladerunner-neg.txt");
110+
111+
// Assert
112+
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
113+
assertThat((double)sentiment.getScore()).isLessThan(0.0);
114+
}
115+
116+
@Test public void analyzeSentiment_returnNeutralFile() throws Exception {
117+
// Act
118+
Sentiment sentiment =
119+
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
120+
+ "sentiment/bladerunner-neutral.txt");
121+
122+
// Assert
123+
assertThat((double)sentiment.getMagnitude()).isGreaterThan(1.0);
124+
assertThat((double)sentiment.getScore()).isWithin(0.1);
125+
}
126+
84127
@Test public void analyzeSyntax_partOfSpeech() throws Exception {
85128
// Act
86129
List<Token> token =
87-
analyzeApp.analyzeSyntax(
130+
analyzeApp.analyzeSyntaxText(
88131
"President Obama was elected for the second term");
89132

90133
List<Tag> got = token.stream().map(e -> e.getPartOfSpeech().getTag())
@@ -94,4 +137,20 @@ public class AnalyzeIT {
94137
assertThat(got).containsExactly(Tag.NOUN, Tag.NOUN, Tag.VERB,
95138
Tag.VERB, Tag.ADP, Tag.DET, Tag.ADJ, Tag.NOUN).inOrder();
96139
}
140+
141+
@Test public void analyzeSyntax_partOfSpeechFile() throws Exception {
142+
// Act
143+
List<Token> token =
144+
analyzeApp.analyzeSyntaxFile("gs://cloud-samples-tests/natural-language/"
145+
+ "sentiment/bladerunner-neutral.txt");
146+
147+
List<Tag> got = token.stream().map(e -> e.getPartOfSpeech().getTag())
148+
.collect(Collectors.toList());
149+
150+
// Assert
151+
assertThat(got).containsExactly(Tag.PRON, Tag.CONJ, Tag.VERB, Tag.CONJ, Tag.VERB,
152+
Tag.DET, Tag.NOUN, Tag.PUNCT, Tag.NOUN, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.CONJ,
153+
Tag.ADV, Tag.PRON, Tag.VERB, Tag.VERB, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.DET,
154+
Tag.NOUN, Tag.VERB, Tag.ADV, Tag.ADJ,Tag.PUNCT).inOrder();
155+
}
97156
}

0 commit comments

Comments
 (0)