From 506270d32c3ab052e81d1a1281d950d6d3776559 Mon Sep 17 00:00:00 2001 From: Trang Nguyen <85181462+tranguyen221@users.noreply.github.com> Date: Mon, 28 Nov 2022 11:14:05 +0100 Subject: [PATCH] Add quickstarted --- .../Evaluate azure text analytics.ipynb | 174 ++---------------- 1 file changed, 19 insertions(+), 155 deletions(-) diff --git a/notebooks/models/Evaluate azure text analytics.ipynb b/notebooks/models/Evaluate azure text analytics.ipynb index af74d75..f7f122d 100644 --- a/notebooks/models/Evaluate azure text analytics.ipynb +++ b/notebooks/models/Evaluate azure text analytics.ipynb @@ -4,31 +4,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Evaluate Azure Text Analytics for PII detection using the Presidio Evaluator framework\n", + "Evaluate Azure Cognitive Services for languages to identify PII using the Presidio Evaluator framework\n", "\n", "Prerequisites: \n", - " - Azure subscription\n", + " - Azure subscription - [Create one for free](https://azure.microsoft.com/en-us/free/cognitive-services/)\n", " - Once you have your Azure subscription, create a Language resource in the Azure portal to get your key and endpoint. After it deploys, click Go to resource.\n", " - You'll need the key and endpoint from the resource you create to connect your application to the API. You'll paste your key and endpoint into the code below later in the quickstart.\n", " - You can use the free pricing tier (Free F0) to try the service, and upgrade later to a paid tier for production.\n", - " - To use the Analyze feature, you'll need a Language resource with the standard (S) pricing tier." + " - To use the Analyze feature, you'll need a Language resource with the standard (S) pricing tier.\n", + "\n", + "Azure Cognitive Services for languages quickstart: https://learn.microsoft.com/en-us/azure/cognitive-services/language-service/personally-identifiable-information/quickstart?pivots=programming-language-python" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "stanza and spacy_stanza are not installed\n", - "Flair is not installed by default\n", - "Flair is not installed\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "from copy import deepcopy\n", @@ -58,45 +50,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "tokenizing input: 0%| | 0/1500 [00:00: 100%|██████████| 1/1 [00:00<00:00, 4.12it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "saving experiment data to experiment_20221128-094558.json\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Evaluating Azure Text Analytics.\")\n", "\n", @@ -289,20 +177,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Confusion matrix:\n", - " Address O\n", - "Address 6 8\n", - "O 0 5\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Confusion matrix:\")\n", "print(pd.DataFrame(confmatrix, columns=entities, index=entities))" @@ -310,22 +187,9 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Precision and recall\n", - " Entity Precision Recall Number of samples\n", - " Address 100.00% 42.86% 14\n", - " Organization nan% 0.00% 1\n", - " PII 100.00% 40.00% 15\n", - "PII F measure: 43.61%\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Precision and recall\")\n", "print(results)"