[text analytics] add sample stories and improve documents (#15429)

Azure · Nov 18, 2020 · 9f4e92b · 9f4e92b
1 parent 10d4675
commit 9f4e92b
Show file tree

Hide file tree

Showing 21 changed files with 627 additions and 344 deletions.
diff --git a/sdk/textanalytics/azure-ai-textanalytics/README.md b/sdk/textanalytics/azure-ai-textanalytics/README.md
@@ -147,7 +147,7 @@ The input for each operation is passed as a **list** of documents.
 
 Each document can be passed as a string in the list, e.g.
 ```python
-documents = ["I hated the movie. It was so slow!", "The movie made it into my top ten favorites.", "What a great movie!"]
+documents = ["I hated the movie. It was so slow!", "The movie made it into my top ten favorites. What a great movie!"]
 ```
 
 or, if you wish to pass in a per-item document `id` or `language`/`country_hint`, they can be passed as a list of
@@ -158,8 +158,7 @@ or a dict-like representation of the object:
 ```python
 documents = [
     {"id": "1", "language": "en", "text": "I hated the movie. It was so slow!"},
-    {"id": "2", "language": "en", "text": "The movie made it into my top ten favorites."},
-    {"id": "3", "language": "en", "text": "What a great movie!"}
+    {"id": "2", "language": "en", "text": "The movie made it into my top ten favorites. What a great movie!"},
 ]
 ```
 
@@ -210,7 +209,7 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
 text_analytics_client = TextAnalyticsClient(endpoint, credential)
 
 documents = [
-    "I did not like the restaurant. The food was too spicy.",
+    "I did not like the restaurant. The food was somehow both too spicy and underseasoned. Additionally, I thought the location was too far away from the playhouse.",
     "The restaurant was decorated beautifully. The atmosphere was unlike any other restaurant I've been to.",
     "The food was yummy. :)"
 ]
@@ -244,8 +243,10 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
 text_analytics_client = TextAnalyticsClient(endpoint, credential)
 
 documents = [
-    "Microsoft was founded by Bill Gates and Paul Allen.",
-    "Redmond is a city in King County, Washington, United States, located 15 miles east of Seattle.",
+    """
+    Microsoft was founded by Bill Gates and Paul Allen. Its headquarters are located in Redmond. Redmond is a
+    city in King County, Washington, United States, located 15 miles east of Seattle.
+    """,
     "Jeff bought three dozen eggs because there was a 50% discount."
 ]
 
@@ -280,7 +281,7 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
 text_analytics_client = TextAnalyticsClient(endpoint, credential)
 
 documents = [
-    "Microsoft was founded by Bill Gates and Paul Allen.",
+    "Microsoft was founded by Bill Gates and Paul Allen. Its headquarters are located in Redmond.",
     "Easter Island, a Chilean territory, is a remote volcanic island in Polynesia."
 ]
 
@@ -318,8 +319,10 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
 text_analytics_client = TextAnalyticsClient(endpoint, credential)
 
 documents = [
-    "The employee's SSN is 859-98-0987.",
-    "The employee's phone number is 555-555-5555."
+    """
+    We have an employee called Parker who cleans up after customers. The employee's
+    SSN is 859-98-0987, and their phone number is 555-555-5555.
+    """
 ]
 response = text_analytics_client.recognize_pii_entities(documents, language="en")
 result = [doc for doc in response if not doc.is_error]
@@ -351,8 +354,10 @@ text_analytics_client = TextAnalyticsClient(endpoint, credential)
 
 documents = [
     "Redmond is a city in King County, Washington, United States, located 15 miles east of Seattle.",
-    "I need to take my cat to the veterinarian.",
-    "I will travel to South America in the summer."
+    """
+    I need to take my cat to the veterinarian. He has been sick recently, and I need to take him
+    before I travel to South America for the summer.
+    """,
 ]
 
 response = text_analytics_client.extract_key_phrases(documents, language="en")
@@ -379,7 +384,10 @@ endpoint="https://<region>.api.cognitive.microsoft.com/"
 text_analytics_client = TextAnalyticsClient(endpoint, credential)
 
 documents = [
-    "This is written in English.",
+    """
+    This whole document is written in English. In order for the whole document to be written
+    in English, every sentence also has to be written in English, which it is.
+    """,
     "Il documento scritto in italiano.",
     "Dies ist in deutsche Sprache verfasst."
 ]

diff --git a/...s/azure-ai-textanalytics/samples/async_samples/sample_alternative_document_input_async.py b/...s/azure-ai-textanalytics/samples/async_samples/sample_alternative_document_input_async.py
@@ -37,11 +37,11 @@ async def alternative_document_input(self):
         text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
 
         documents = [
-            {"id": "0", "language": "en", "text": "I had the best day of my life."},
-            {"id": "1", "language": "en",
+            {"id": "0", "country_hint": "US", "text": "I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more. I developed a deep-connection with my instructor as well."},
+            {"id": "1", "country_hint": "GB",
              "text": "This was a waste of my time. The speaker put me to sleep."},
-            {"id": "2", "language": "es", "text": "No tengo dinero ni nada que dar..."},
-            {"id": "3", "language": "fr",
+            {"id": "2", "country_hint": "MX", "text": "No tengo dinero ni nada que dar..."},
+            {"id": "3", "country_hint": "FR",
              "text": "L'hôtel n'était pas très confortable. L'éclairage était trop sombre."}
         ]
         async with text_analytics_client:

diff --git a/...tanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_async.py b/...tanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_async.py
@@ -13,6 +13,10 @@
     This sample demonstrates how to analyze sentiment in documents.
     An overall and per-sentence sentiment is returned.
 
+    In this sample we will be a skydiving company going through reviews people have left for our company.
+    We will extract the reviews that we are certain have a positive sentiment and post them onto our
+    website to attract more divers.
+
 USAGE:
     python sample_analyze_sentiment_async.py
 
@@ -28,6 +32,14 @@
 class AnalyzeSentimentSampleAsync(object):
 
     async def analyze_sentiment_async(self):
+        print(
+            "In this sample we will be combing through reviews customers have left about their"
+            "experience using our skydiving company, Contoso."
+        )
+        print(
+            "We start out with a list of reviews. Let us extract the reviews we are sure are "
+            "positive, so we can display them on our website and get even more customers!"
+        )
         # [START analyze_sentiment_async]
         from azure.core.credentials import AzureKeyCredential
         from azure.ai.textanalytics.aio import TextAnalyticsClient
@@ -36,38 +48,64 @@ async def analyze_sentiment_async(self):
         key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
 
         text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
+
         documents = [
-            "I had the best day of my life.",
-            "This was a waste of my time. The speaker put me to sleep.",
-            "No tengo dinero ni nada que dar...",
-            "L'hôtel n'était pas très confortable. L'éclairage était trop sombre."
+            """I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more.
+            I developed a deep-connection with my instructor as well, and I feel as if I've made a life-long friend in her.""",
+            """This was a waste of my time. All of the views on this drop are extremely boring, all I saw was grass. 0/10 would
+            not recommend to any divers, even first timers.""",
+            """This was pretty good! The sights were ok, and I had fun with my instructors! Can't complain too much about my experience""",
+            """I only have one word for my experience: WOW!!! I can't believe I have had such a wonderful skydiving company right
+            in my backyard this whole time! I will definitely be a repeat customer, and I want to take my grandmother skydiving too,
+            I know she'll love it!"""
         ]
 
         async with text_analytics_client:
             result = await text_analytics_client.analyze_sentiment(documents)
 
         docs = [doc for doc in result if not doc.is_error]
 
+        print("Let's visualize the sentiment of each of these documents")
         for idx, doc in enumerate(docs):
             print("Document text: {}".format(documents[idx]))
             print("Overall sentiment: {}".format(doc.sentiment))
         # [END analyze_sentiment_async]
-            print("Overall confidence scores: positive={}; neutral={}; negative={} \n".format(
-                doc.confidence_scores.positive,
-                doc.confidence_scores.neutral,
-                doc.confidence_scores.negative,
-            ))
-            for sentence in doc.sentences:
-                print("Sentence '{}' has sentiment: {}".format(sentence.text, sentence.sentiment))
-                print("...Sentence is {} characters from the start of the document and is {} characters long".format(
-                    sentence.offset, len(sentence.text)
-                ))
-                print("...Sentence confidence scores: positive={}; neutral={}; negative={}".format(
-                    sentence.confidence_scores.positive,
-                    sentence.confidence_scores.neutral,
-                    sentence.confidence_scores.negative,
-                ))
-            print("------------------------------------")
+
+        print("Now, let us extract all of the positive reviews")
+        positive_reviews = [doc for doc in docs if doc.sentiment == 'positive']
+
+        print("We want to be very confident that our reviews are positive since we'll be posting them on our website.")
+        print("We're going to confirm our chosen reviews are positive using two different tests")
+
+        print(
+            "First, we are going to check how confident the sentiment analysis model is that a document is positive. "
+            "Let's go with a 90% confidence."
+        )
+        positive_reviews = [
+            review for review in positive_reviews
+            if review.confidence_scores.positive >= 0.9
+        ]
+
+        print(
+            "Finally, we also want to make sure every sentence is positive so we only showcase our best selves!"
+        )
+        positive_reviews_final = []
+        for idx, review in enumerate(positive_reviews):
+            print("Looking at positive review #{}".format(idx + 1))
+            any_sentence_not_positive = False
+            for sentence in review.sentences:
+                print("...Sentence '{}' has sentiment '{}' with confidence scores '{}'".format(
+                    sentence.text,
+                    sentence.sentiment,
+                    sentence.confidence_scores
+                    )
+                )
+                if sentence.sentiment != 'positive':
+                    any_sentence_not_positive = True
+            if not any_sentence_not_positive:
+                positive_reviews_final.append(review)
+
+        print("We now have the final list of positive reviews we are going to display on our website!")
 
 
 async def main():