From 9f4e92b5749c81b9357e9d40efbcfa3029fbba3c Mon Sep 17 00:00:00 2001 From: iscai-msft <43154838+iscai-msft@users.noreply.github.com> Date: Wed, 18 Nov 2020 18:17:25 -0500 Subject: [PATCH] [text analytics] add sample stories and improve documents (#15429) --- .../azure-ai-textanalytics/README.md | 32 ++-- ...sample_alternative_document_input_async.py | 8 +- .../sample_analyze_sentiment_async.py | 78 +++++++--- ...yze_sentiment_with_opinion_mining_async.py | 147 ++++++++---------- .../sample_authentication_async.py | 14 +- .../sample_detect_language_async.py | 44 ++++-- .../sample_extract_key_phrases_async.py | 44 ++++-- ..._detailed_diagnostics_information_async.py | 4 +- .../sample_recognize_entities_async.py | 42 +++-- .../sample_recognize_linked_entities_async.py | 47 ++++-- .../sample_recognize_pii_entities_async.py | 41 ++++- .../sample_alternative_document_input.py | 8 +- .../samples/sample_analyze_sentiment.py | 79 +++++++--- ...e_analyze_sentiment_with_opinion_mining.py | 147 ++++++++---------- .../samples/sample_authentication.py | 14 +- .../samples/sample_detect_language.py | 43 +++-- .../samples/sample_extract_key_phrases.py | 44 ++++-- ...le_get_detailed_diagnostics_information.py | 4 +- .../samples/sample_recognize_entities.py | 42 +++-- .../sample_recognize_linked_entities.py | 47 ++++-- .../samples/sample_recognize_pii_entities.py | 42 ++++- 21 files changed, 627 insertions(+), 344 deletions(-) diff --git a/sdk/textanalytics/azure-ai-textanalytics/README.md b/sdk/textanalytics/azure-ai-textanalytics/README.md index e1c663460fa6..e71da6c8d9be 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/README.md +++ b/sdk/textanalytics/azure-ai-textanalytics/README.md @@ -147,7 +147,7 @@ The input for each operation is passed as a **list** of documents. Each document can be passed as a string in the list, e.g. ```python -documents = ["I hated the movie. It was so slow!", "The movie made it into my top ten favorites.", "What a great movie!"] +documents = ["I hated the movie. It was so slow!", "The movie made it into my top ten favorites. What a great movie!"] ``` or, if you wish to pass in a per-item document `id` or `language`/`country_hint`, they can be passed as a list of @@ -158,8 +158,7 @@ or a dict-like representation of the object: ```python documents = [ {"id": "1", "language": "en", "text": "I hated the movie. It was so slow!"}, - {"id": "2", "language": "en", "text": "The movie made it into my top ten favorites."}, - {"id": "3", "language": "en", "text": "What a great movie!"} + {"id": "2", "language": "en", "text": "The movie made it into my top ten favorites. What a great movie!"}, ] ``` @@ -210,7 +209,7 @@ endpoint="https://.api.cognitive.microsoft.com/" text_analytics_client = TextAnalyticsClient(endpoint, credential) documents = [ - "I did not like the restaurant. The food was too spicy.", + "I did not like the restaurant. The food was somehow both too spicy and underseasoned. Additionally, I thought the location was too far away from the playhouse.", "The restaurant was decorated beautifully. The atmosphere was unlike any other restaurant I've been to.", "The food was yummy. :)" ] @@ -244,8 +243,10 @@ endpoint="https://.api.cognitive.microsoft.com/" text_analytics_client = TextAnalyticsClient(endpoint, credential) documents = [ - "Microsoft was founded by Bill Gates and Paul Allen.", - "Redmond is a city in King County, Washington, United States, located 15 miles east of Seattle.", + """ + Microsoft was founded by Bill Gates and Paul Allen. Its headquarters are located in Redmond. Redmond is a + city in King County, Washington, United States, located 15 miles east of Seattle. + """, "Jeff bought three dozen eggs because there was a 50% discount." ] @@ -280,7 +281,7 @@ endpoint="https://.api.cognitive.microsoft.com/" text_analytics_client = TextAnalyticsClient(endpoint, credential) documents = [ - "Microsoft was founded by Bill Gates and Paul Allen.", + "Microsoft was founded by Bill Gates and Paul Allen. Its headquarters are located in Redmond.", "Easter Island, a Chilean territory, is a remote volcanic island in Polynesia." ] @@ -318,8 +319,10 @@ endpoint="https://.api.cognitive.microsoft.com/" text_analytics_client = TextAnalyticsClient(endpoint, credential) documents = [ - "The employee's SSN is 859-98-0987.", - "The employee's phone number is 555-555-5555." + """ + We have an employee called Parker who cleans up after customers. The employee's + SSN is 859-98-0987, and their phone number is 555-555-5555. + """ ] response = text_analytics_client.recognize_pii_entities(documents, language="en") result = [doc for doc in response if not doc.is_error] @@ -351,8 +354,10 @@ text_analytics_client = TextAnalyticsClient(endpoint, credential) documents = [ "Redmond is a city in King County, Washington, United States, located 15 miles east of Seattle.", - "I need to take my cat to the veterinarian.", - "I will travel to South America in the summer." + """ + I need to take my cat to the veterinarian. He has been sick recently, and I need to take him + before I travel to South America for the summer. + """, ] response = text_analytics_client.extract_key_phrases(documents, language="en") @@ -379,7 +384,10 @@ endpoint="https://.api.cognitive.microsoft.com/" text_analytics_client = TextAnalyticsClient(endpoint, credential) documents = [ - "This is written in English.", + """ + This whole document is written in English. In order for the whole document to be written + in English, every sentence also has to be written in English, which it is. + """, "Il documento scritto in italiano.", "Dies ist in deutsche Sprache verfasst." ] diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_alternative_document_input_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_alternative_document_input_async.py index d447fe8c7beb..9bcc831839e1 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_alternative_document_input_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_alternative_document_input_async.py @@ -37,11 +37,11 @@ async def alternative_document_input(self): text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) documents = [ - {"id": "0", "language": "en", "text": "I had the best day of my life."}, - {"id": "1", "language": "en", + {"id": "0", "country_hint": "US", "text": "I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more. I developed a deep-connection with my instructor as well."}, + {"id": "1", "country_hint": "GB", "text": "This was a waste of my time. The speaker put me to sleep."}, - {"id": "2", "language": "es", "text": "No tengo dinero ni nada que dar..."}, - {"id": "3", "language": "fr", + {"id": "2", "country_hint": "MX", "text": "No tengo dinero ni nada que dar..."}, + {"id": "3", "country_hint": "FR", "text": "L'hôtel n'était pas très confortable. L'éclairage était trop sombre."} ] async with text_analytics_client: diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_async.py index 5f1c3fe7b89b..2ae4e37a228b 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_async.py @@ -13,6 +13,10 @@ This sample demonstrates how to analyze sentiment in documents. An overall and per-sentence sentiment is returned. + In this sample we will be a skydiving company going through reviews people have left for our company. + We will extract the reviews that we are certain have a positive sentiment and post them onto our + website to attract more divers. + USAGE: python sample_analyze_sentiment_async.py @@ -28,6 +32,14 @@ class AnalyzeSentimentSampleAsync(object): async def analyze_sentiment_async(self): + print( + "In this sample we will be combing through reviews customers have left about their" + "experience using our skydiving company, Contoso." + ) + print( + "We start out with a list of reviews. Let us extract the reviews we are sure are " + "positive, so we can display them on our website and get even more customers!" + ) # [START analyze_sentiment_async] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics.aio import TextAnalyticsClient @@ -36,11 +48,16 @@ async def analyze_sentiment_async(self): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) + documents = [ - "I had the best day of my life.", - "This was a waste of my time. The speaker put me to sleep.", - "No tengo dinero ni nada que dar...", - "L'hôtel n'était pas très confortable. L'éclairage était trop sombre." + """I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more. + I developed a deep-connection with my instructor as well, and I feel as if I've made a life-long friend in her.""", + """This was a waste of my time. All of the views on this drop are extremely boring, all I saw was grass. 0/10 would + not recommend to any divers, even first timers.""", + """This was pretty good! The sights were ok, and I had fun with my instructors! Can't complain too much about my experience""", + """I only have one word for my experience: WOW!!! I can't believe I have had such a wonderful skydiving company right + in my backyard this whole time! I will definitely be a repeat customer, and I want to take my grandmother skydiving too, + I know she'll love it!""" ] async with text_analytics_client: @@ -48,26 +65,47 @@ async def analyze_sentiment_async(self): docs = [doc for doc in result if not doc.is_error] + print("Let's visualize the sentiment of each of these documents") for idx, doc in enumerate(docs): print("Document text: {}".format(documents[idx])) print("Overall sentiment: {}".format(doc.sentiment)) # [END analyze_sentiment_async] - print("Overall confidence scores: positive={}; neutral={}; negative={} \n".format( - doc.confidence_scores.positive, - doc.confidence_scores.neutral, - doc.confidence_scores.negative, - )) - for sentence in doc.sentences: - print("Sentence '{}' has sentiment: {}".format(sentence.text, sentence.sentiment)) - print("...Sentence is {} characters from the start of the document and is {} characters long".format( - sentence.offset, len(sentence.text) - )) - print("...Sentence confidence scores: positive={}; neutral={}; negative={}".format( - sentence.confidence_scores.positive, - sentence.confidence_scores.neutral, - sentence.confidence_scores.negative, - )) - print("------------------------------------") + + print("Now, let us extract all of the positive reviews") + positive_reviews = [doc for doc in docs if doc.sentiment == 'positive'] + + print("We want to be very confident that our reviews are positive since we'll be posting them on our website.") + print("We're going to confirm our chosen reviews are positive using two different tests") + + print( + "First, we are going to check how confident the sentiment analysis model is that a document is positive. " + "Let's go with a 90% confidence." + ) + positive_reviews = [ + review for review in positive_reviews + if review.confidence_scores.positive >= 0.9 + ] + + print( + "Finally, we also want to make sure every sentence is positive so we only showcase our best selves!" + ) + positive_reviews_final = [] + for idx, review in enumerate(positive_reviews): + print("Looking at positive review #{}".format(idx + 1)) + any_sentence_not_positive = False + for sentence in review.sentences: + print("...Sentence '{}' has sentiment '{}' with confidence scores '{}'".format( + sentence.text, + sentence.sentiment, + sentence.confidence_scores + ) + ) + if sentence.sentiment != 'positive': + any_sentence_not_positive = True + if not any_sentence_not_positive: + positive_reviews_final.append(review) + + print("We now have the final list of positive reviews we are going to display on our website!") async def main(): diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_with_opinion_mining_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_with_opinion_mining_async.py index 058c1752d9c8..500db7f207d8 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_with_opinion_mining_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_sentiment_with_opinion_mining_async.py @@ -14,9 +14,8 @@ opinions from reviews (also known as aspect-based sentiment analysis). This feature is only available for clients with api version v3.1-preview and up. - In this sample, we will be a customer who is trying to figure out whether they should stay - at a specific hotel. We will be looking at which aspects of the hotel are good, and which are - not. + In this sample, we will be a hotel owner looking for complaints users have about our hotel, + in the hopes that we can improve people's experiences. USAGE: python sample_analyze_sentiment_with_opinion_mining_async.py @@ -26,55 +25,23 @@ 2) AZURE_TEXT_ANALYTICS_KEY - your Text Analytics subscription key OUTPUT: - In this sample we will be combing through the reviews of a potential hotel to stay at: Hotel Foo. - I first found a handful of reviews for Hotel Foo. Let's see if I want to stay here. + In this sample we will be a hotel owner going through reviews of their hotel to find complaints. + I first found a handful of reviews for my hotel. Let's see what we have to improve. + Let's first see the general sentiment of each of these reviews + ...We have 1 positive reviews, 2 mixed reviews, and 0 negative reviews. - Let's see how many positive and negative reviews of this hotel I have right now - ...We have 3 positive reviews and 2 negative reviews. + Since these reviews seem so mixed, and since I'm interested in finding exactly what it is about my hotel that should be improved, let's find the complaints users have about individual aspects of this hotel - Looks more positive than negative, but still pretty mixed, so I'm going to drill deeper into the opinions of individual aspects of this hotel + In order to do that, I'm going to extract aspects that have a negative sentiment. I'm going to map aspect to the mined opinion object we get back to aggregate the reviews by aspect. - In order to do that, I'm going to sort them based on whether these opinions are positive, mixed, or negative + Let's now go through the aspects of our hotel people have complained about and see what users have specifically said + Users have made 1 complaints about 'food', specifically saying that it's 'unacceptable' + Users have made 1 complaints about 'service', specifically saying that it's 'unacceptable' + Users have made 3 complaints about 'toilet', specifically saying that it's 'smelly', 'broken', 'dirty' - Let's look at the 7 positive opinions users have expressed for aspects of this hotel - ...Reviewers have the following opinions for the overall positive 'concierge' aspect of the hotel - ......'positive' opinion 'nice' - ...Reviewers have the following opinions for the overall positive 'AC' aspect of the hotel - ......'positive' opinion 'good' - ......'positive' opinion 'quiet' - ...Reviewers have the following opinions for the overall positive 'breakfast' aspect of the hotel - ......'positive' opinion 'good' - ...Reviewers have the following opinions for the overall positive 'hotel' aspect of the hotel - ......'positive' opinion 'good' - ...Reviewers have the following opinions for the overall positive 'breakfast' aspect of the hotel - ......'positive' opinion 'nice' - ...Reviewers have the following opinions for the overall positive 'shuttle service' aspect of the hotel - ......'positive' opinion 'loved' - ...Reviewers have the following opinions for the overall positive 'view' aspect of the hotel - ......'positive' opinion 'great' - ......'positive' opinion 'unobstructed' - - - Now let's look at the 1 mixed opinions users have expressed for aspects of this hotel - ...Reviewers have the following opinions for the overall mixed 'rooms' aspect of the hotel - ......'positive' opinion 'beautiful' - ......'negative' opinion 'dirty' - - - Finally, let's see the 4 negative opinions users have expressed for aspects of this hotel - ...Reviewers have the following opinions for the overall negative 'food' aspect of the hotel - ......'negative' opinion 'unacceptable' - ...Reviewers have the following opinions for the overall negative 'service' aspect of the hotel - ......'negative' opinion 'unacceptable' - ...Reviewers have the following opinions for the overall negative 'elevator' aspect of the hotel - ......'negative' opinion 'broken' - ...Reviewers have the following opinions for the overall negative 'toilet' aspect of the hotel - ......'negative' opinion 'smelly' - - - Looking at the breakdown, even though there were more positive opinions of this hotel, I care the most about the food and the toilets in a hotel, so I will be staying elsewhere + Looking at the breakdown, I can see what aspects of my hotel need improvement, and based off of both the number and content of the complaints users have made about my toilets, I need to get that fixed ASAP. """ import os @@ -94,67 +61,75 @@ async def analyze_sentiment_with_opinion_mining(self): credential=AzureKeyCredential(key) ) - print("In this sample we will be combing through the reviews of a potential hotel to stay at: Hotel Foo.") + print("In this sample we will be a hotel owner going through reviews of their hotel to find complaints.") print( - "I first found a handful of reviews for Hotel Foo. Let's see if I want to stay here." + "I first found a handful of reviews for my hotel. Let's see what we have to improve." ) documents = [ - "The food and service were unacceptable, but the concierge were nice", - "The rooms were beautiful but dirty. The AC was good and quiet, but the elevator was broken", - "The breakfast was good, but the toilet was smelly", - "Loved this hotel - good breakfast - nice shuttle service.", - "I had a great unobstructed view of the Microsoft campus" + """ + The food and service were unacceptable, but the concierge were nice. + After talking to them about the quality of the food and the process to get room service they refunded + the money we spent at the restaurant and gave us a voucher for near by restaurants. + """, + """ + The rooms were beautiful. The AC was good and quiet, which was key for us as outside it was 100F and our baby + was getting uncomfortable because of the heat. The breakfast was good too with good options and good servicing times. + The thing we didn't like was that the toilet in our bathroom was smelly. It could have been that the toilet was broken before we arrived. + Either way it was very uncomfortable. Once we notified the staff, they came and cleaned it and left candles. + """, + """ + Nice rooms! I had a great unobstructed view of the Microsoft campus but bathrooms were old and the toilet was dirty when we arrived. + It was close to bus stops and groceries stores. If you want to be close to campus I will recommend it, otherwise, might be better to stay in a cleaner one + """ ] async with text_analytics_client: result = await text_analytics_client.analyze_sentiment(documents, show_opinion_mining=True) doc_result = [doc for doc in result if not doc.is_error] - print("\n\nLet's see how many positive and negative reviews of this hotel I have right now") + print("\nLet's first see the general sentiment of each of these reviews") positive_reviews = [doc for doc in doc_result if doc.sentiment == "positive"] + mixed_reviews = [doc for doc in doc_result if doc.sentiment == "mixed"] negative_reviews = [doc for doc in doc_result if doc.sentiment == "negative"] - print("...We have {} positive reviews and {} negative reviews. ".format(len(positive_reviews), len(negative_reviews))) - print("\nLooks more positive than negative, but still pretty mixed, so I'm going to drill deeper into the opinions of individual aspects of this hotel") + print("...We have {} positive reviews, {} mixed reviews, and {} negative reviews. ".format( + len(positive_reviews), len(mixed_reviews), len(negative_reviews) + )) + print( + "\nSince these reviews seem so mixed, and since I'm interested in finding exactly what it is about my hotel that should be improved, " + "let's find the complaints users have about individual aspects of this hotel" + ) - print("\nIn order to do that, I'm going to sort them based on whether these opinions are positive, mixed, or negative") - positive_mined_opinions = [] - mixed_mined_opinions = [] - negative_mined_opinions = [] + print( + "\nIn order to do that, I'm going to extract aspects that have a negative sentiment. " + "I'm going to map aspect to the mined opinion object we get back to aggregate the reviews by aspect. " + ) + aspect_to_complaints = {} for document in doc_result: for sentence in document.sentences: for mined_opinion in sentence.mined_opinions: aspect = mined_opinion.aspect - if aspect.sentiment == "positive": - positive_mined_opinions.append(mined_opinion) - elif aspect.sentiment == "mixed": - mixed_mined_opinions.append(mined_opinion) - else: - negative_mined_opinions.append(mined_opinion) - - print("\n\nLet's look at the {} positive opinions users have expressed for aspects of this hotel".format(len(positive_mined_opinions))) - for mined_opinion in positive_mined_opinions: - print("...Reviewers have the following opinions for the overall positive '{}' aspect of the hotel".format(mined_opinion.aspect.text)) - for opinion in mined_opinion.opinions: - print("......'{}' opinion '{}'".format(opinion.sentiment, opinion.text)) - - print("\n\nNow let's look at the {} mixed opinions users have expressed for aspects of this hotel".format(len(mixed_mined_opinions))) - for mined_opinion in mixed_mined_opinions: - print("...Reviewers have the following opinions for the overall mixed '{}' aspect of the hotel".format(mined_opinion.aspect.text)) - for opinion in mined_opinion.opinions: - print("......'{}' opinion '{}'".format(opinion.sentiment, opinion.text)) - - print("\n\nFinally, let's see the {} negative opinions users have expressed for aspects of this hotel".format(len(negative_mined_opinions))) - for mined_opinion in negative_mined_opinions: - print("...Reviewers have the following opinions for the overall negative '{}' aspect of the hotel".format(mined_opinion.aspect.text)) - for opinion in mined_opinion.opinions: - print("......'{}' opinion '{}'".format(opinion.sentiment, opinion.text)) + if aspect.sentiment == 'negative': + aspect_to_complaints.setdefault(aspect.text, []) + aspect_to_complaints[aspect.text].append(mined_opinion) + + print("\nLet's now go through the aspects of our hotel people have complained about and see what users have specifically said") + + for aspect, complaints in aspect_to_complaints.items(): + print("Users have made {} complaint(s) about '{}', specifically saying that it's '{}'".format( + len(complaints), + aspect, + "', '".join( + [opinion.text for complaint in complaints for opinion in complaint.opinions] + ) + )) + print( - "\n\nLooking at the breakdown, even though there were more positive opinions of this hotel, " - "I care the most about the food and the toilets in a hotel, so I will be staying elsewhere" + "\n\nLooking at the breakdown, I can see what aspects of my hotel need improvement, and based off of both the number and " + "content of the complaints users have made about my toilets, I need to get that fixed ASAP." ) async def main(): diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_authentication_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_authentication_async.py index f0ac344c75de..2e4f1162e037 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_authentication_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_authentication_async.py @@ -47,7 +47,12 @@ async def authentication_with_api_key_credential_async(self): text_analytics_client = TextAnalyticsClient(endpoint, AzureKeyCredential(key)) # [END create_ta_client_with_key_async] - doc = ["I need to take my cat to the veterinarian."] + doc = [ + """ + I need to take my cat to the veterinarian. She's been coughing for a while and I thought it was just a hairball, + but now I'm now worried it might be something else. She's still very healthy so I'm not too worried though. + """ + ] async with text_analytics_client: result = await text_analytics_client.detect_language(doc) @@ -69,7 +74,12 @@ async def authentication_with_azure_active_directory_async(self): text_analytics_client = TextAnalyticsClient(endpoint, credential=credential) # [END create_ta_client_with_aad_async] - doc = ["I need to take my cat to the veterinarian."] + doc = [ + """ + I need to take my cat to the veterinarian. She's been coughing for a while and I thought it was just a hairball, + but now I'm now worried it might be something else. She's still very healthy so I'm not too worried though. + """ + ] async with text_analytics_client: result = await text_analytics_client.detect_language(doc) diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_detect_language_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_detect_language_async.py index c47cc01c7c3d..02a4b65662ea 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_detect_language_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_detect_language_async.py @@ -13,6 +13,10 @@ This sample demonstrates how to detect language in a batch of different documents. + In this sample, we own a hotel with a lot of international clientele. We + are looking to catalog the reviews we have for our hotel by language, so + we can translate these reviews into English. + USAGE: python sample_detect_language_async.py @@ -28,6 +32,11 @@ class DetectLanguageSampleAsync(object): async def detect_language_async(self): + print( + "In this sample we own a hotel with customers from all around the globe. We want to eventually " + "translate these reviews into English so our manager can read them. However, we first need to know which language " + "they are in for more accurate translation. This is the step we will be covering in this sample\n" + ) # [START detect_language_async] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics.aio import TextAnalyticsClient @@ -37,24 +46,37 @@ async def detect_language_async(self): text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) documents = [ - "This document is written in English.", - "Este es un document escrito en Español.", - "这是一个用中文写的文件", - "Dies ist ein Dokument in deutsche Sprache.", - "Detta är ett dokument skrivet på engelska." + """ + The concierge Paulette was extremely helpful. Sadly when we arrived the elevator was broken, but with Paulette's help we barely noticed this inconvenience. + She arranged for our baggage to be brought up to our room with no extra charge and gave us a free meal to refurbish all of the calories we lost from + walking up the stairs :). Can't say enough good things about my experience! + """, + """ + 最近由于工作压力太大,我们决定去富酒店度假。那儿的温泉实在太舒服了,我跟我丈夫都完全恢复了工作前的青春精神!加油! + """ ] async with text_analytics_client: result = await text_analytics_client.detect_language(documents) - for idx, doc in enumerate(result): - if not doc.is_error: - print("Document text: {}".format(documents[idx])) - print("Language detected: {}".format(doc.primary_language.name)) - print("ISO6391 name: {}".format(doc.primary_language.iso6391_name)) - print("Confidence score: {}\n".format(doc.primary_language.confidence_score)) + reviewed_docs = [doc for doc in result if not doc.is_error] + + print("Let's see what language each review is in!") + + for idx, doc in enumerate(reviewed_docs): + print("Review #{} is in '{}', which has ISO639-1 name '{}'\n".format( + idx, doc.primary_language.name, doc.primary_language.iso6391_name + )) if doc.is_error: print(doc.id, doc.error) # [END detect_language_async] + print( + "When actually storing the reviews, we want to map the review to their ISO639-1 name " + "so everything is more standardized" + ) + + review_to_language = {} + for idx, doc in enumerate(reviewed_docs): + review_to_language[documents[idx]] = doc.primary_language.iso6391_name async def main(): diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_extract_key_phrases_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_extract_key_phrases_async.py index 26f814e9c0e7..8ea90b01746f 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_extract_key_phrases_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_extract_key_phrases_async.py @@ -12,6 +12,9 @@ DESCRIPTION: This sample demonstrates how to extract key talking points from a batch of documents. + In this sample, we want to go over articles and read the ones that mention Microsoft. + We're going to use the SDK to create a rudimentary search algorithm to find these articles. + USAGE: python sample_extract_key_phrases_async.py @@ -27,6 +30,10 @@ class ExtractKeyPhrasesSampleAsync(object): async def extract_key_phrases_async(self): + print( + "In this sample, we want to find the articles that mention Microsoft to read." + ) + articles_that_mention_microsoft = [] # [START extract_key_phrases_async] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics.aio import TextAnalyticsClient @@ -35,21 +42,40 @@ async def extract_key_phrases_async(self): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) - documents = [ - "Redmond is a city in King County, Washington, United States, located 15 miles east of Seattle.", - "I need to take my cat to the veterinarian.", - "I will travel to South America in the summer.", + articles = [ + """ + Washington, D.C. Autumn in DC is a uniquely beautiful season. The leaves fall from the trees + in a city chockful of forrests, leaving yellow leaves on the ground and a clearer view of the + blue sky above... + """, + """ + Redmond, WA. In the past few days, Microsoft has decided to further postpone the start date of + its United States workers, due to the pandemic that rages with no end in sight... + """, + """ + Redmond, WA. Employees at Microsoft can be excited about the new coffee shop that will open on campus + once workers no longer have to work remotely... + """ ] async with text_analytics_client: - result = await text_analytics_client.extract_key_phrases(documents) + result = await text_analytics_client.extract_key_phrases(articles) - for doc in result: + for idx, doc in enumerate(result): if not doc.is_error: - print(doc.key_phrases) - if doc.is_error: - print(doc.id, doc.error) + print("Key phrases in article #{}: {}".format( + idx + 1, + ", ".join(doc.key_phrases) + )) # [END extract_key_phrases_async] + if "Microsoft" in doc.key_phrases: + articles_that_mention_microsoft.append(str(idx + 1)) + + print( + "The articles that mention Microsoft are articles number: {}. Those are the ones I'm interested in reading.".format( + ", ".join(articles_that_mention_microsoft) + ) + ) async def main(): diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_get_detailed_diagnostics_information_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_get_detailed_diagnostics_information_async.py index 1572df2e0f94..fd0689a30a62 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_get_detailed_diagnostics_information_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_get_detailed_diagnostics_information_async.py @@ -41,7 +41,9 @@ async def get_detailed_diagnostics_information_async(self): text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key), logging_enable=True) documents = [ - "I had the best day of my life.", + """I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more. + I developed a deep-connection with my instructor as well. + """, "This was a waste of my time. The speaker put me to sleep.", "No tengo dinero ni nada que dar...", "L'hôtel n'était pas très confortable. L'éclairage était trop sombre." diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_entities_async.py index 714a6a28914c..70bca058dd20 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_entities_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_entities_async.py @@ -12,6 +12,8 @@ DESCRIPTION: This sample demonstrates how to recognize named entities in a batch of documents. + In this sample, we own a catering business. We want to sort the reviews for our business + based off of which organization hired us. USAGE: python sample_recognize_entities_async.py @@ -27,6 +29,11 @@ class RecognizeEntitiesSampleAsync(object): async def recognize_entities_async(self): + print( + "In this sample, we are a catering business, and we're looking to sort the reviews " + "for our organization based off of the organization that hired us for catering" + ) + organization_to_reviews = {} # [START recognize_entities_async] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics.aio import TextAnalyticsClient @@ -35,25 +42,34 @@ async def recognize_entities_async(self): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) - documents = [ - "Microsoft was founded by Bill Gates and Paul Allen.", - "I had a wonderful trip to Seattle last week.", - "I visited the Space Needle 2 times.", + reviews = [ + """I work for Foo Company, and we hired Contoso for our annual founding ceremony. The food + was amazing and we all can't say enough good words about the quality and the level of service.""", + """We at the Foo Company re-hired Contoso after all of our past successes with the company. + Though the food was still great, I feel there has been a quality drop since their last time + catering for us. Is anyone else running into the same problem?""", + """Bar Company is over the moon about the service we received from Contoso, the best sliders ever!!!!""" ] async with text_analytics_client: - result = await text_analytics_client.recognize_entities(documents) + result = await text_analytics_client.recognize_entities(reviews) - docs = [doc for doc in result if not doc.is_error] + result = [review for review in result if not review.is_error] - for idx, doc in enumerate(docs): - print("\nDocument text: {}".format(documents[idx])) - for entity in doc.entities: - print("Entity: {}".format(entity.text)) - print("...Category: {}".format(entity.category)) - print("...Confidence Score: {}".format(entity.confidence_score)) - print("...Offset: {}".format(entity.offset)) + for idx, review in enumerate(result): + for entity in review.entities: + print("Entity '{}' has category '{}'".format(entity.text, entity.category)) # [END recognize_entities_async] + if entity.category == 'Organization': + organization_to_reviews.setdefault(entity.text, []) + organization_to_reviews[entity.text].append(reviews[idx]) + + for organization, reviews in organization_to_reviews.items(): + print( + "\n\nOrganization '{}' has left us the following review(s): {}".format( + organization, "\n\n".join(reviews) + ) + ) async def main(): diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_linked_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_linked_entities_async.py index 85384eef0a01..9bac94d62f9b 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_linked_entities_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_linked_entities_async.py @@ -14,6 +14,10 @@ Each entity found in the document will have a link associated with it from a data source. + In this sample, we are students conducting research for a class project. We want to extract + Wikipedia articles for all of the entries listed in our documents, so we can have all possible + links extracted out of our research documents. + USAGE: python sample_recognize_linked_entities_async.py @@ -29,6 +33,11 @@ class RecognizeLinkedEntitiesSampleAsync(object): async def recognize_linked_entities_async(self): + print( + "In this sample, we are students conducting research for a class project. We will extract " + "links to Wikipedia articles for all entities listed in our research documents, so we have " + "all of the necessary information for research purposes." + ) # [START recognize_linked_entities_async] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics.aio import TextAnalyticsClient @@ -38,9 +47,13 @@ async def recognize_linked_entities_async(self): text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) documents = [ - "Microsoft moved its headquarters to Bellevue, Washington in January 1979.", - "Steve Ballmer stepped down as CEO of Microsoft and was succeeded by Satya Nadella.", - "Microsoft superó a Apple Inc. como la compañía más valiosa que cotiza en bolsa en el mundo.", + """ + Microsoft was founded by Bill Gates with some friends he met at Harvard. One of his friends, + Steve Ballmer, eventually became CEO after Bill Gates as well. Steve Ballmer eventually stepped + down as CEO of Microsoft, and was succeeded by Satya Nadella. + Microsoft originally moved its headquarters to Bellevue, Wahsington in Januaray 1979, but is now + headquartered in Redmond. + """ ] async with text_analytics_client: @@ -48,20 +61,26 @@ async def recognize_linked_entities_async(self): docs = [doc for doc in result if not doc.is_error] - for idx, doc in enumerate(docs): - print("Document text: {}\n".format(documents[idx])) + print( + "Let's map each entity to it's Wikipedia article. I also want to see how many times each " + "entity is mentioned in a document\n\n" + ) + entity_to_url = {} + for doc in docs: for entity in doc.entities: - print("Entity: {}".format(entity.name)) - print("...URL: {}".format(entity.url)) - print("...Data Source: {}".format(entity.data_source)) - print("...Entity matches:") - for match in entity.matches: - print("......Entity match text: {}".format(match.text)) - print("......Confidence Score: {}".format(match.confidence_score)) - print("......Offset: {}".format(match.offset)) - print("------------------------------------------") + print("Entity '{}' has been mentioned '{}' time(s)".format( + entity.name, len(entity.matches) + )) + if entity.data_source == "Wikipedia": + entity_to_url[entity.name] = entity.url # [END recognize_linked_entities_async] + print("\nNow let's see all of the Wikipedia articles we've extracted from our research documents") + for entity, url in entity_to_url.items(): + print("Link to Wikipedia article for '{}': {}".format( + entity, url + )) + async def main(): sample = RecognizeLinkedEntitiesSampleAsync() diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_pii_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_pii_entities_async.py index 35dc489e98a5..7c580718d212 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_pii_entities_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_pii_entities_async.py @@ -13,6 +13,9 @@ This sample demonstrates how to recognize personally identifiable information in a batch of documents. The endpoint recognize_pii_entities is only available for API version v3.1-preview and up. + In this sample, we will be working for a company that handles loan payments. To follow privacy guidelines, + we need to redact all of our information before we make it public. + USAGE: python sample_recognize_pii_entities_async.py @@ -28,6 +31,12 @@ class RecognizePiiEntitiesSampleAsync(object): async def recognize_pii_entities_async(self): + print( + "In this sample we will be going through our customer's loan payment information and redacting " + "all PII (personally identifable information) before storing this information on our public website. " + "I'm also looking to explicitly extract the SSN information, so I can update my database with SSNs for " + "our customers" + ) # [START recognize_pii_entities_async] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics.aio import TextAnalyticsClient @@ -39,9 +48,9 @@ async def recognize_pii_entities_async(self): endpoint=endpoint, credential=AzureKeyCredential(key) ) documents = [ - "The employee's SSN is 859-98-0987.", - "Is 998.214.865-68 your Brazilian CPF number?", - "My phone number is 555-555-5555" + """Parker Doe has repaid all of their loans as of 2020-04-25. + Their SSN is 859-98-0987. To contact them, use their phone number + 555-555-5555. They are originally from Brazil and have Brazilian CPF number 998.214.865-68""" ] async with text_analytics_client: @@ -49,14 +58,34 @@ async def recognize_pii_entities_async(self): docs = [doc for doc in result if not doc.is_error] + print( + "Let's compare the original document with the documents after redaction. " + "I also want to comb through all of the entities that got redacted" + ) for idx, doc in enumerate(docs): print("Document text: {}".format(documents[idx])) print("Redacted document text: {}".format(doc.redacted_text)) for entity in doc.entities: - print("...Entity: {}".format(entity.text)) - print("......Category: {}".format(entity.category)) - print("......Confidence Score: {}\n".format(entity.confidence_score)) + print("...Entity '{}' with category '{}' got redacted".format( + entity.text, entity.category + )) # [END recognize_pii_entities_async] + print("All of the information that I expect to be redacted is!") + + print( + "Now I want to explicitly extract SSN information to add to my user SSN database. " + "I also want to be fairly confident that what I'm storing is an SSN, so let's also " + "ensure that we're > 60% positive the entity is a SSN" + ) + ssns = [] + for doc in docs: + for entity in doc.entities: + if entity.category == 'U.S. Social Security Number (SSN)' and entity.confidence_score >= 0.6: + ssns.append(entity.text) + + print("We have extracted the following SSNs as well: '{}'".format( + "', '".join(ssns) + )) async def main(): diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_alternative_document_input.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_alternative_document_input.py index d924eae76054..e1f569600aa4 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_alternative_document_input.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_alternative_document_input.py @@ -38,11 +38,11 @@ def alternative_document_input(self): text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) documents = [ - {"id": "0", "language": "en", "text": "I had the best day of my life."}, - {"id": "1", "language": "en", + {"id": "0", "country_hint": "US", "text": "I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more. I developed a deep-connection with my instructor as well."}, + {"id": "1", "country_hint": "GB", "text": "This was a waste of my time. The speaker put me to sleep."}, - {"id": "2", "language": "es", "text": "No tengo dinero ni nada que dar..."}, - {"id": "3", "language": "fr", + {"id": "2", "country_hint": "MX", "text": "No tengo dinero ni nada que dar..."}, + {"id": "3", "country_hint": "FR", "text": "L'hôtel n'était pas très confortable. L'éclairage était trop sombre."} ] diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_sentiment.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_sentiment.py index 2e2ac1739680..1ff95581e876 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_sentiment.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_sentiment.py @@ -13,6 +13,10 @@ This sample demonstrates how to analyze sentiment in documents. An overall and per-sentence sentiment is returned. + In this sample we will be a skydiving company going through reviews people have left for our company. + We will extract the reviews that we are certain have a positive sentiment and post them onto our + website to attract more divers. + USAGE: python sample_analyze_sentiment.py @@ -27,6 +31,15 @@ class AnalyzeSentimentSample(object): def analyze_sentiment(self): + print( + "In this sample we will be combing through reviews customers have left about their" + "experience using our skydiving company, Contoso." + ) + print( + "We start out with a list of reviews. Let us extract the reviews we are sure are " + "positive, so we can display them on our website and get even more customers!" + ) + # [START analyze_sentiment] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import TextAnalyticsClient @@ -35,37 +48,63 @@ def analyze_sentiment(self): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) + documents = [ - "I had the best day of my life.", - "This was a waste of my time. The speaker put me to sleep.", - "No tengo dinero ni nada que dar...", - "L'hôtel n'était pas très confortable. L'éclairage était trop sombre." + """I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more. + I developed a deep-connection with my instructor as well, and I feel as if I've made a life-long friend in her.""", + """This was a waste of my time. All of the views on this drop are extremely boring, all I saw was grass. 0/10 would + not recommend to any divers, even first timers.""", + """This was pretty good! The sights were ok, and I had fun with my instructors! Can't complain too much about my experience""", + """I only have one word for my experience: WOW!!! I can't believe I have had such a wonderful skydiving company right + in my backyard this whole time! I will definitely be a repeat customer, and I want to take my grandmother skydiving too, + I know she'll love it!""" ] + result = text_analytics_client.analyze_sentiment(documents) docs = [doc for doc in result if not doc.is_error] + print("Let's visualize the sentiment of each of these documents") for idx, doc in enumerate(docs): print("Document text: {}".format(documents[idx])) print("Overall sentiment: {}".format(doc.sentiment)) # [END analyze_sentiment] - print("Overall confidence scores: positive={}; neutral={}; negative={} \n".format( - doc.confidence_scores.positive, - doc.confidence_scores.neutral, - doc.confidence_scores.negative, - )) - for sentence in doc.sentences: - print("Sentence '{}' has sentiment: {}".format(sentence.text, sentence.sentiment)) - print("...Sentence is {} characters from the start of the document and is {} characters long".format( - sentence.offset, len(sentence.text) - )) - print("...Sentence confidence scores: positive={}; neutral={}; negative={}".format( - sentence.confidence_scores.positive, - sentence.confidence_scores.neutral, - sentence.confidence_scores.negative, - )) - print("------------------------------------") + print("Now, let us extract all of the positive reviews") + positive_reviews = [doc for doc in docs if doc.sentiment == 'positive'] + + print("We want to be very confident that our reviews are positive since we'll be posting them on our website.") + print("We're going to confirm our chosen reviews are positive using two different tests") + + print( + "First, we are going to check how confident the sentiment analysis model is that a document is positive. " + "Let's go with a 90% confidence." + ) + positive_reviews = [ + review for review in positive_reviews + if review.confidence_scores.positive >= 0.9 + ] + + print( + "Finally, we also want to make sure every sentence is positive so we only showcase our best selves!" + ) + positive_reviews_final = [] + for idx, review in enumerate(positive_reviews): + print("Looking at positive review #{}".format(idx + 1)) + any_sentence_not_positive = False + for sentence in review.sentences: + print("...Sentence '{}' has sentiment '{}' with confidence scores '{}'".format( + sentence.text, + sentence.sentiment, + sentence.confidence_scores + ) + ) + if sentence.sentiment != 'positive': + any_sentence_not_positive = True + if not any_sentence_not_positive: + positive_reviews_final.append(review) + + print("We now have the final list of positive reviews we are going to display on our website!") if __name__ == '__main__': sample = AnalyzeSentimentSample() diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_sentiment_with_opinion_mining.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_sentiment_with_opinion_mining.py index e3bfb9628f59..e4c4b3c76dfe 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_sentiment_with_opinion_mining.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_sentiment_with_opinion_mining.py @@ -14,9 +14,8 @@ opinions from reviews (also known as aspect-based sentiment analysis). This feature is only available for clients with api version v3.1-preview and up. - In this sample, we will be a customer who is trying to figure out whether they should stay - at a specific hotel. We will be looking at which aspects of the hotel are good, and which are - not. + In this sample, we will be a hotel owner looking for complaints users have about our hotel, + in the hopes that we can improve people's experiences. USAGE: python sample_analyze_sentiment_with_opinion_mining.py @@ -26,55 +25,23 @@ 2) AZURE_TEXT_ANALYTICS_KEY - your Text Analytics subscription key OUTPUT: - In this sample we will be combing through the reviews of a potential hotel to stay at: Hotel Foo. - I first found a handful of reviews for Hotel Foo. Let's see if I want to stay here. + In this sample we will be a hotel owner going through reviews of their hotel to find complaints. + I first found a handful of reviews for my hotel. Let's see what we have to improve. + Let's first see the general sentiment of each of these reviews + ...We have 1 positive reviews, 2 mixed reviews, and 0 negative reviews. - Let's see how many positive and negative reviews of this hotel I have right now - ...We have 3 positive reviews and 2 negative reviews. + Since these reviews seem so mixed, and since I'm interested in finding exactly what it is about my hotel that should be improved, let's find the complaints users have about individual aspects of this hotel - Looks more positive than negative, but still pretty mixed, so I'm going to drill deeper into the opinions of individual aspects of this hotel + In order to do that, I'm going to extract aspects that have a negative sentiment. I'm going to map aspect to the mined opinion object we get back to aggregate the reviews by aspect. - In order to do that, I'm going to sort them based on whether these opinions are positive, mixed, or negative + Let's now go through the aspects of our hotel people have complained about and see what users have specifically said + Users have made 1 complaints about 'food', specifically saying that it's 'unacceptable' + Users have made 1 complaints about 'service', specifically saying that it's 'unacceptable' + Users have made 3 complaints about 'toilet', specifically saying that it's 'smelly', 'broken', 'dirty' - Let's look at the 7 positive opinions users have expressed for aspects of this hotel - ...Reviewers have the following opinions for the overall positive 'concierge' aspect of the hotel - ......'positive' opinion 'nice' - ...Reviewers have the following opinions for the overall positive 'AC' aspect of the hotel - ......'positive' opinion 'good' - ......'positive' opinion 'quiet' - ...Reviewers have the following opinions for the overall positive 'breakfast' aspect of the hotel - ......'positive' opinion 'good' - ...Reviewers have the following opinions for the overall positive 'hotel' aspect of the hotel - ......'positive' opinion 'good' - ...Reviewers have the following opinions for the overall positive 'breakfast' aspect of the hotel - ......'positive' opinion 'nice' - ...Reviewers have the following opinions for the overall positive 'shuttle service' aspect of the hotel - ......'positive' opinion 'loved' - ...Reviewers have the following opinions for the overall positive 'view' aspect of the hotel - ......'positive' opinion 'great' - ......'positive' opinion 'unobstructed' - - - Now let's look at the 1 mixed opinions users have expressed for aspects of this hotel - ...Reviewers have the following opinions for the overall mixed 'rooms' aspect of the hotel - ......'positive' opinion 'beautiful' - ......'negative' opinion 'dirty' - - - Finally, let's see the 4 negative opinions users have expressed for aspects of this hotel - ...Reviewers have the following opinions for the overall negative 'food' aspect of the hotel - ......'negative' opinion 'unacceptable' - ...Reviewers have the following opinions for the overall negative 'service' aspect of the hotel - ......'negative' opinion 'unacceptable' - ...Reviewers have the following opinions for the overall negative 'elevator' aspect of the hotel - ......'negative' opinion 'broken' - ...Reviewers have the following opinions for the overall negative 'toilet' aspect of the hotel - ......'negative' opinion 'smelly' - - - Looking at the breakdown, even though there were more positive opinions of this hotel, I care the most about the food and the toilets in a hotel, so I will be staying elsewhere + Looking at the breakdown, I can see what aspects of my hotel need improvement, and based off of both the number and content of the complaints users have made about my toilets, I need to get that fixed ASAP. """ import os @@ -93,66 +60,74 @@ def sample_analyze_sentiment_with_opinion_mining(self): credential=AzureKeyCredential(key) ) - print("In this sample we will be combing through the reviews of a potential hotel to stay at: Hotel Foo.") + print("In this sample we will be a hotel owner going through reviews of their hotel to find complaints.") print( - "I first found a handful of reviews for Hotel Foo. Let's see if I want to stay here." + "I first found a handful of reviews for my hotel. Let's see what we have to improve." ) documents = [ - "The food and service were unacceptable, but the concierge were nice", - "The rooms were beautiful but dirty. The AC was good and quiet, but the elevator was broken", - "The breakfast was good, but the toilet was smelly", - "Loved this hotel - good breakfast - nice shuttle service.", - "I had a great unobstructed view of the Microsoft campus" + """ + The food and service were unacceptable, but the concierge were nice. + After talking to them about the quality of the food and the process to get room service they refunded + the money we spent at the restaurant and gave us a voucher for near by restaurants. + """, + """ + The rooms were beautiful. The AC was good and quiet, which was key for us as outside it was 100F and our baby + was getting uncomfortable because of the heat. The breakfast was good too with good options and good servicing times. + The thing we didn't like was that the toilet in our bathroom was smelly. It could have been that the toilet was broken before we arrived. + Either way it was very uncomfortable. Once we notified the staff, they came and cleaned it and left candles. + """, + """ + Nice rooms! I had a great unobstructed view of the Microsoft campus but bathrooms were old and the toilet was dirty when we arrived. + It was close to bus stops and groceries stores. If you want to be close to campus I will recommend it, otherwise, might be better to stay in a cleaner one + """ ] result = text_analytics_client.analyze_sentiment(documents, show_opinion_mining=True) doc_result = [doc for doc in result if not doc.is_error] - print("\n\nLet's see how many positive and negative reviews of this hotel I have right now") + print("\nLet's first see the general sentiment of each of these reviews") positive_reviews = [doc for doc in doc_result if doc.sentiment == "positive"] + mixed_reviews = [doc for doc in doc_result if doc.sentiment == "mixed"] negative_reviews = [doc for doc in doc_result if doc.sentiment == "negative"] - print("...We have {} positive reviews and {} negative reviews. ".format(len(positive_reviews), len(negative_reviews))) - print("\nLooks more positive than negative, but still pretty mixed, so I'm going to drill deeper into the opinions of individual aspects of this hotel") + print("...We have {} positive reviews, {} mixed reviews, and {} negative reviews. ".format( + len(positive_reviews), len(mixed_reviews), len(negative_reviews) + )) + print( + "\nSince these reviews seem so mixed, and since I'm interested in finding exactly what it is about my hotel that should be improved, " + "let's find the complaints users have about individual aspects of this hotel" + ) - print("\nIn order to do that, I'm going to sort them based on whether these opinions are positive, mixed, or negative") - positive_mined_opinions = [] - mixed_mined_opinions = [] - negative_mined_opinions = [] + print( + "\nIn order to do that, I'm going to extract aspects that have a negative sentiment. " + "I'm going to map aspect to the mined opinion object we get back to aggregate the reviews by aspect. " + ) + aspect_to_complaints = {} for document in doc_result: for sentence in document.sentences: for mined_opinion in sentence.mined_opinions: aspect = mined_opinion.aspect - if aspect.sentiment == "positive": - positive_mined_opinions.append(mined_opinion) - elif aspect.sentiment == "mixed": - mixed_mined_opinions.append(mined_opinion) - else: - negative_mined_opinions.append(mined_opinion) - - print("\n\nLet's look at the {} positive opinions users have expressed for aspects of this hotel".format(len(positive_mined_opinions))) - for mined_opinion in positive_mined_opinions: - print("...Reviewers have the following opinions for the overall positive '{}' aspect of the hotel".format(mined_opinion.aspect.text)) - for opinion in mined_opinion.opinions: - print("......'{}' opinion '{}'".format(opinion.sentiment, opinion.text)) - - print("\n\nNow let's look at the {} mixed opinions users have expressed for aspects of this hotel".format(len(mixed_mined_opinions))) - for mined_opinion in mixed_mined_opinions: - print("...Reviewers have the following opinions for the overall mixed '{}' aspect of the hotel".format(mined_opinion.aspect.text)) - for opinion in mined_opinion.opinions: - print("......'{}' opinion '{}'".format(opinion.sentiment, opinion.text)) - - print("\n\nFinally, let's see the {} negative opinions users have expressed for aspects of this hotel".format(len(negative_mined_opinions))) - for mined_opinion in negative_mined_opinions: - print("...Reviewers have the following opinions for the overall negative '{}' aspect of the hotel".format(mined_opinion.aspect.text)) - for opinion in mined_opinion.opinions: - print("......'{}' opinion '{}'".format(opinion.sentiment, opinion.text)) + if aspect.sentiment == 'negative': + aspect_to_complaints.setdefault(aspect.text, []) + aspect_to_complaints[aspect.text].append(mined_opinion) + + print("\nLet's now go through the aspects of our hotel people have complained about and see what users have specifically said") + + for aspect, complaints in aspect_to_complaints.items(): + print("Users have made {} complaint(s) about '{}', specifically saying that it's '{}'".format( + len(complaints), + aspect, + "', '".join( + [opinion.text for complaint in complaints for opinion in complaint.opinions] + ) + )) + print( - "\n\nLooking at the breakdown, even though there were more positive opinions of this hotel, " - "I care the most about the food and the toilets in a hotel, so I will be staying elsewhere" + "\n\nLooking at the breakdown, I can see what aspects of my hotel need improvement, and based off of both the number and " + "content of the complaints users have made about my toilets, I need to get that fixed ASAP." ) diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_authentication.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_authentication.py index 0348b40ee163..b369ff0ecc6c 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_authentication.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_authentication.py @@ -46,7 +46,12 @@ def authentication_with_api_key_credential(self): text_analytics_client = TextAnalyticsClient(endpoint, AzureKeyCredential(key)) # [END create_ta_client_with_key] - doc = ["I need to take my cat to the veterinarian."] + doc = [ + """ + I need to take my cat to the veterinarian. She's been coughing for a while and I thought it was just a hairball, + but now I'm now worried it might be something else. She's still very healthy so I'm not too worried though. + """ + ] result = text_analytics_client.detect_language(doc) print("Language detected: {}".format(result[0].primary_language.name)) @@ -67,7 +72,12 @@ def authentication_with_azure_active_directory(self): text_analytics_client = TextAnalyticsClient(endpoint, credential=credential) # [END create_ta_client_with_aad] - doc = ["I need to take my cat to the veterinarian."] + doc = [ + """ + I need to take my cat to the veterinarian. She's been coughing for a while and I thought it was just a hairball, + but now I'm now worried it might be something else. She's still very healthy so I'm not too worried though. + """ + ] result = text_analytics_client.detect_language(doc) print("Language detected: {}".format(result[0].primary_language.name)) diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_detect_language.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_detect_language.py index 0fa7d0bf4611..d0e006c40f1e 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_detect_language.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_detect_language.py @@ -13,6 +13,10 @@ This sample demonstrates how to detect language in a batch of different documents. + In this sample, we own a hotel with a lot of international clientele. We + are looking to catalog the reviews we have for our hotel by language, so + we can translate these reviews into English. + USAGE: python sample_detect_language.py @@ -27,6 +31,11 @@ class DetectLanguageSample(object): def detect_language(self): + print( + "In this sample we own a hotel with customers from all around the globe. We want to eventually " + "translate these reviews into English so our manager can read them. However, we first need to know which language " + "they are in for more accurate translation. This is the step we will be covering in this sample\n" + ) # [START detect_language] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import TextAnalyticsClient @@ -36,24 +45,36 @@ def detect_language(self): text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) documents = [ - "This document is written in English.", - "Este es un document escrito en Español.", - "这是一个用中文写的文件", - "Dies ist ein Dokument in deutsche Sprache.", - "Detta är ett dokument skrivet på engelska." + """ + The concierge Paulette was extremely helpful. Sadly when we arrived the elevator was broken, but with Paulette's help we barely noticed this inconvenience. + She arranged for our baggage to be brought up to our room with no extra charge and gave us a free meal to refurbish all of the calories we lost from + walking up the stairs :). Can't say enough good things about my experience! + """, + """ + 最近由于工作压力太大,我们决定去富酒店度假。那儿的温泉实在太舒服了,我跟我丈夫都完全恢复了工作前的青春精神!加油! + """ ] result = text_analytics_client.detect_language(documents) + reviewed_docs = [doc for doc in result if not doc.is_error] + + print("Let's see what language each review is in!") - for idx, doc in enumerate(result): - if not doc.is_error: - print("Document text: {}".format(documents[idx])) - print("Language detected: {}".format(doc.primary_language.name)) - print("ISO6391 name: {}".format(doc.primary_language.iso6391_name)) - print("Confidence score: {}\n".format(doc.primary_language.confidence_score)) + for idx, doc in enumerate(reviewed_docs): + print("Review #{} is in '{}', which has ISO639-1 name '{}'\n".format( + idx, doc.primary_language.name, doc.primary_language.iso6391_name + )) if doc.is_error: print(doc.id, doc.error) # [END detect_language] + print( + "When actually storing the reviews, we want to map the review to their ISO639-1 name " + "so everything is more standardized" + ) + + review_to_language = {} + for idx, doc in enumerate(reviewed_docs): + review_to_language[documents[idx]] = doc.primary_language.iso6391_name if __name__ == '__main__': diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_extract_key_phrases.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_extract_key_phrases.py index 271c7d76d032..58fd2d15b728 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_extract_key_phrases.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_extract_key_phrases.py @@ -12,6 +12,9 @@ DESCRIPTION: This sample demonstrates how to extract key talking points from a batch of documents. + In this sample, we want to go over articles and read the ones that mention Microsoft. + We're going to use the SDK to create a rudimentary search algorithm to find these articles. + USAGE: python sample_extract_key_phrases.py @@ -26,6 +29,10 @@ class ExtractKeyPhrasesSample(object): def extract_key_phrases(self): + print( + "In this sample, we want to find the articles that mention Microsoft to read." + ) + articles_that_mention_microsoft = [] # [START extract_key_phrases] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import TextAnalyticsClient @@ -34,19 +41,38 @@ def extract_key_phrases(self): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) - documents = [ - "Redmond is a city in King County, Washington, United States, located 15 miles east of Seattle.", - "I need to take my cat to the veterinarian.", - "I will travel to South America in the summer.", + articles = [ + """ + Washington, D.C. Autumn in DC is a uniquely beautiful season. The leaves fall from the trees + in a city chockful of forrests, leaving yellow leaves on the ground and a clearer view of the + blue sky above... + """, + """ + Redmond, WA. In the past few days, Microsoft has decided to further postpone the start date of + its United States workers, due to the pandemic that rages with no end in sight... + """, + """ + Redmond, WA. Employees at Microsoft can be excited about the new coffee shop that will open on campus + once workers no longer have to work remotely... + """ ] - result = text_analytics_client.extract_key_phrases(documents) - for doc in result: + result = text_analytics_client.extract_key_phrases(articles) + for idx, doc in enumerate(result): if not doc.is_error: - print(doc.key_phrases) - if doc.is_error: - print(doc.id, doc.error) + print("Key phrases in article #{}: {}".format( + idx + 1, + ", ".join(doc.key_phrases) + )) # [END extract_key_phrases] + if "Microsoft" in doc.key_phrases: + articles_that_mention_microsoft.append(str(idx + 1)) + + print( + "The articles that mention Microsoft are articles number: {}. Those are the ones I'm interested in reading.".format( + ", ".join(articles_that_mention_microsoft) + ) + ) if __name__ == '__main__': diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_get_detailed_diagnostics_information.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_get_detailed_diagnostics_information.py index 00cd971e21c2..a67e879a84ad 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_get_detailed_diagnostics_information.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_get_detailed_diagnostics_information.py @@ -40,7 +40,9 @@ def get_detailed_diagnostics_information(self): text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key), logging_enable=True) documents = [ - "I had the best day of my life.", + """I had the best day of my life. I decided to go sky-diving and it made me appreciate my whole life so much more. + I developed a deep-connection with my instructor as well. + """, "This was a waste of my time. The speaker put me to sleep.", "No tengo dinero ni nada que dar...", "L'hôtel n'était pas très confortable. L'éclairage était trop sombre." diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_entities.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_entities.py index 291f24c76f2c..ad43a5a025c8 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_entities.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_entities.py @@ -12,6 +12,8 @@ DESCRIPTION: This sample demonstrates how to recognize named entities in a batch of documents. + In this sample, we own a catering business. We want to sort the reviews for our business + based off of which organization hired us. USAGE: python sample_recognize_entities.py @@ -26,6 +28,11 @@ class RecognizeEntitiesSample(object): def recognize_entities(self): + print( + "In this sample, we are a catering business, and we're looking to sort the reviews " + "for our organization based off of the organization that hired us for catering" + ) + organization_to_reviews = {} # [START recognize_entities] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import TextAnalyticsClient @@ -34,23 +41,32 @@ def recognize_entities(self): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) - documents = [ - "Microsoft was founded by Bill Gates and Paul Allen.", - "I had a wonderful trip to Seattle last week.", - "I visited the Space Needle 2 times.", + reviews = [ + """I work for Foo Company, and we hired Contoso for our annual founding ceremony. The food + was amazing and we all can't say enough good words about the quality and the level of service.""", + """We at the Foo Company re-hired Contoso after all of our past successes with the company. + Though the food was still great, I feel there has been a quality drop since their last time + catering for us. Is anyone else running into the same problem?""", + """Bar Company is over the moon about the service we received from Contoso, the best sliders ever!!!!""" ] - result = text_analytics_client.recognize_entities(documents) - docs = [doc for doc in result if not doc.is_error] + result = text_analytics_client.recognize_entities(reviews) + result = [review for review in result if not review.is_error] - for idx, doc in enumerate(docs): - print("\nDocument text: {}".format(documents[idx])) - for entity in doc.entities: - print("Entity: {}".format(entity.text)) - print("...Category: {}".format(entity.category)) - print("...Confidence Score: {}".format(entity.confidence_score)) - print("...Offset: {}".format(entity.offset)) + for idx, review in enumerate(result): + for entity in review.entities: + print("Entity '{}' has category '{}'".format(entity.text, entity.category)) # [END recognize_entities] + if entity.category == 'Organization': + organization_to_reviews.setdefault(entity.text, []) + organization_to_reviews[entity.text].append(reviews[idx]) + + for organization, reviews in organization_to_reviews.items(): + print( + "\n\nOrganization '{}' has left us the following review(s): {}".format( + organization, "\n\n".join(reviews) + ) + ) if __name__ == '__main__': diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_linked_entities.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_linked_entities.py index c8e08527df47..20a6aeb05a57 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_linked_entities.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_linked_entities.py @@ -14,6 +14,10 @@ Each entity found in the document will have a link associated with it from a data source. + In this sample, we are students conducting research for a class project. We want to extract + Wikipedia articles for all of the entries listed in our documents, so we can have all possible + links extracted out of our research documents. + USAGE: python sample_recognize_linked_entities.py @@ -28,6 +32,11 @@ class RecognizeLinkedEntitiesSample(object): def recognize_linked_entities(self): + print( + "In this sample, we are students conducting research for a class project. We will extract " + "links to Wikipedia articles for all entities listed in our research documents, so we have " + "all of the necessary information for research purposes." + ) # [START recognize_linked_entities] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import TextAnalyticsClient @@ -37,28 +46,38 @@ def recognize_linked_entities(self): text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) documents = [ - "Microsoft moved its headquarters to Bellevue, Washington in January 1979.", - "Steve Ballmer stepped down as CEO of Microsoft and was succeeded by Satya Nadella.", - "Microsoft superó a Apple Inc. como la compañía más valiosa que cotiza en bolsa en el mundo.", + """ + Microsoft was founded by Bill Gates with some friends he met at Harvard. One of his friends, + Steve Ballmer, eventually became CEO after Bill Gates as well. Steve Ballmer eventually stepped + down as CEO of Microsoft, and was succeeded by Satya Nadella. + Microsoft originally moved its headquarters to Bellevue, Wahsington in Januaray 1979, but is now + headquartered in Redmond. + """ ] result = text_analytics_client.recognize_linked_entities(documents) docs = [doc for doc in result if not doc.is_error] - for idx, doc in enumerate(docs): - print("Document text: {}\n".format(documents[idx])) + print( + "Let's map each entity to it's Wikipedia article. I also want to see how many times each " + "entity is mentioned in a document\n\n" + ) + entity_to_url = {} + for doc in docs: for entity in doc.entities: - print("Entity: {}".format(entity.name)) - print("...URL: {}".format(entity.url)) - print("...Data Source: {}".format(entity.data_source)) - print("...Entity matches:") - for match in entity.matches: - print("......Entity match text: {}".format(match.text)) - print("......Confidence Score: {}".format(match.confidence_score)) - print("......Offset: {}".format(match.offset)) - print("------------------------------------------") + print("Entity '{}' has been mentioned '{}' time(s)".format( + entity.name, len(entity.matches) + )) + if entity.data_source == "Wikipedia": + entity_to_url[entity.name] = entity.url # [END recognize_linked_entities] + print("\nNow let's see all of the Wikipedia articles we've extracted from our research documents") + for entity, url in entity_to_url.items(): + print("Link to Wikipedia article for '{}': {}".format( + entity, url + )) + if __name__ == '__main__': sample = RecognizeLinkedEntitiesSample() diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_pii_entities.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_pii_entities.py index 3f209ff51878..2516d9a269a6 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_pii_entities.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_pii_entities.py @@ -13,6 +13,9 @@ This sample demonstrates how to recognize personally identifiable information in a batch of documents. The endpoint recognize_pii_entities is only available for API version v3.1-preview and up. + In this sample, we will be working for a company that handles loan payments. To follow privacy guidelines, + we need to redact all of our information before we make it public. + USAGE: python sample_recognize_pii_entities.py @@ -27,6 +30,12 @@ class RecognizePiiEntitiesSample(object): def recognize_pii_entities(self): + print( + "In this sample we will be going through our customer's loan payment information and redacting " + "all PII (personally identifable information) before storing this information on our public website. " + "I'm also looking to explicitly extract the SSN information, so I can update my database with SSNs for " + "our customers" + ) # [START recognize_pii_entities] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import TextAnalyticsClient @@ -38,22 +47,43 @@ def recognize_pii_entities(self): endpoint=endpoint, credential=AzureKeyCredential(key) ) documents = [ - "The employee's SSN is 859-98-0987.", - "Is 998.214.865-68 your Brazilian CPF number?", - "My phone number is 555-555-5555" + """Parker Doe has repaid all of their loans as of 2020-04-25. + Their SSN is 859-98-0987. To contact them, use their phone number + 555-555-5555. They are originally from Brazil and have Brazilian CPF number 998.214.865-68""" ] result = text_analytics_client.recognize_pii_entities(documents) docs = [doc for doc in result if not doc.is_error] + print( + "Let's compare the original document with the documents after redaction. " + "I also want to comb through all of the entities that got redacted" + ) for idx, doc in enumerate(docs): print("Document text: {}".format(documents[idx])) print("Redacted document text: {}".format(doc.redacted_text)) for entity in doc.entities: - print("...Entity: {}".format(entity.text)) - print("......Category: {}".format(entity.category)) - print("......Confidence Score: {}\n".format(entity.confidence_score)) + print("...Entity '{}' with category '{}' got redacted".format( + entity.text, entity.category + )) + # [END recognize_pii_entities] + print("All of the information that I expect to be redacted is!") + + print( + "Now I want to explicitly extract SSN information to add to my user SSN database. " + "I also want to be fairly confident that what I'm storing is an SSN, so let's also " + "ensure that we're > 60% positive the entity is a SSN" + ) + ssns = [] + for doc in docs: + for entity in doc.entities: + if entity.category == 'U.S. Social Security Number (SSN)' and entity.confidence_score >= 0.6: + ssns.append(entity.text) + + print("We have extracted the following SSNs as well: '{}'".format( + "', '".join(ssns) + )) if __name__ == '__main__':