Skip to content

Commit

Permalink
[text analytics] return None for offset and length for v3.0 (#13382)
Browse files Browse the repository at this point in the history
  • Loading branch information
iscai-msft authored Aug 28, 2020
1 parent 077e344 commit d25ce0e
Show file tree
Hide file tree
Showing 20 changed files with 681 additions and 15 deletions.
2 changes: 1 addition & 1 deletion sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
- We are now targeting the service's v3.1-preview.1 API as the default. If you would like to still use version v3.0 of the service,
pass in `v3.0` to the kwarg `api_version` when creating your TextAnalyticsClient
- We have added an API `recognize_pii_entities` which returns entities containing personal information for a batch of documents. Only available for API version v3.1-preview.1 and up.
- Added `offset` and `length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`.
- Added `offset` and `length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`. These properties are only available for API versions v3.1-preview.1 and up.
- `length` is the number of characters in the text of these models
- `offset` is the offset of the text from the start of the document
- We now have added support for opinion mining. To use this feature, you need to make sure you are using the service's
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
# Licensed under the MIT License.
# ------------------------------------
import re
from ._generated.v3_0.models._models import (
from ._generated.models import (
LanguageInput,
MultiLanguageInput
MultiLanguageInput,
)

from ._generated.v3_0 import models as _v3_0_models

def _get_indices(relation):
return [int(s) for s in re.findall(r"\d+", relation)]

Expand Down Expand Up @@ -207,9 +209,9 @@ class CategorizedEntity(DictMixin):
:ivar subcategory: Entity subcategory, such as Age/Year/TimeRange etc
:vartype subcategory: str
:ivar int offset: The entity text offset from the start of the document.
Returned in unicode code points.
Returned in unicode code points. Only returned for api versions v3.1-preview.1 and up.
:ivar int length: The length of the entity text. Returned
in unicode code points.
in unicode code points. Only returned for api versions v3.1-preview.1 and up.
:ivar confidence_score: Confidence score between 0 and 1 of the extracted
entity.
:vartype confidence_score: float
Expand All @@ -225,12 +227,19 @@ def __init__(self, **kwargs):

@classmethod
def _from_generated(cls, entity):
offset = entity.offset
length = entity.length
if isinstance(entity, _v3_0_models.Entity):
# we do not return offset and length for v3.0 since
# the correct encoding was not introduced for v3.0
offset = None
length = None
return cls(
text=entity.text,
category=entity.category,
subcategory=entity.subcategory,
offset=entity.offset,
length=entity.length,
offset=offset,
length=length,
confidence_score=entity.confidence_score,
)

Expand Down Expand Up @@ -640,9 +649,9 @@ class LinkedEntityMatch(DictMixin):
:vartype confidence_score: float
:ivar text: Entity text as appears in the request.
:ivar int offset: The linked entity match text offset from the start of the document.
Returned in unicode code points.
Returned in unicode code points. Only returned for api versions v3.1-preview.1 and up.
:ivar int length: The length of the linked entity match text. Returned
in unicode code points.
in unicode code points. Only returned for api versions v3.1-preview.1 and up.
:vartype text: str
"""

Expand All @@ -654,11 +663,18 @@ def __init__(self, **kwargs):

@classmethod
def _from_generated(cls, match):
offset = match.offset
length = match.length
if isinstance(match, _v3_0_models.Match):
# we do not return offset and length for v3.0 since
# the correct encoding was not introduced for v3.0
offset = None
length = None
return cls(
confidence_score=match.confidence_score,
text=match.text,
offset=match.offset,
length=match.length
offset=offset,
length=length
)

def __repr__(self):
Expand Down Expand Up @@ -745,9 +761,9 @@ class SentenceSentiment(DictMixin):
:vartype confidence_scores:
~azure.ai.textanalytics.SentimentConfidenceScores
:ivar int offset: The sentence offset from the start of the document. Returned
in unicode code points.
in unicode code points. Only returned for api versions v3.1-preview.1 and up.
:ivar int length: The length of the sentence. Returned
in unicode code points.
in unicode code points. Only returned for api versions v3.1-preview.1 and up.
:ivar mined_opinions: The list of opinions mined from this sentence.
For example in "The food is good, but the service is bad", we would
mind these two opinions "food is good", "service is bad". Only returned
Expand All @@ -766,6 +782,13 @@ def __init__(self, **kwargs):

@classmethod
def _from_generated(cls, sentence, results):
offset = sentence.offset
length = sentence.length
if isinstance(sentence, _v3_0_models.SentenceSentiment):
# we do not return offset and length for v3.0 since
# the correct encoding was not introduced for v3.0
offset = None
length = None
if hasattr(sentence, "aspects"):
mined_opinions = (
[MinedOpinion._from_generated(aspect, results) for aspect in sentence.aspects] # pylint: disable=protected-access
Expand All @@ -777,8 +800,8 @@ def _from_generated(cls, sentence, results):
text=sentence.text,
sentiment=sentence.sentiment,
confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores), # pylint: disable=protected-access
offset=sentence.offset,
length=sentence.length,
offset=offset,
length=length,
mined_opinions=mined_opinions
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "I like nature. I do not like being
inside", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '99'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/sentiment?showStats=false
response:
body:
string: '{"documents":[{"id":"0","sentiment":"mixed","confidenceScores":{"positive":0.44,"neutral":0.27,"negative":0.29},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.88,"neutral":0.11,"negative":0.01},"offset":0,"length":14,"text":"I
like nature."},{"sentiment":"negative","confidenceScores":{"positive":0.01,"neutral":0.43,"negative":0.56},"offset":15,"length":26,"text":"I
do not like being inside"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- 94e0a047-a7be-4d12-a4ec-81ef3f496950
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Thu, 27 Aug 2020 20:56:20 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '78'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "I like nature. I do not like being
inside", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '99'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"0","sentiment":"mixed","confidenceScores":{"positive":0.44,"neutral":0.27,"negative":0.29},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.88,"neutral":0.11,"negative":0.01},"offset":0,"length":14,"text":"I
like nature."},{"sentiment":"negative","confidenceScores":{"positive":0.01,"neutral":0.43,"negative":0.56},"offset":15,"length":26,"text":"I
do not like being inside"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- c1dc9d16-85c8-420d-95a1-76b21edbb06f
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Fri, 28 Aug 2020 18:31:18 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '81'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "I like nature. I do not like being
inside", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '99'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/sentiment?showStats=false
response:
body:
string: '{"documents":[{"id":"0","sentiment":"mixed","confidenceScores":{"positive":0.44,"neutral":0.27,"negative":0.29},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.88,"neutral":0.11,"negative":0.01},"offset":0,"length":14,"text":"I
like nature."},{"sentiment":"negative","confidenceScores":{"positive":0.01,"neutral":0.43,"negative":0.56},"offset":15,"length":26,"text":"I
do not like being inside"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id: 0577ce48-c371-418e-b478-cc085c7ecaf8
content-type: application/json; charset=utf-8
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
date: Thu, 27 Aug 2020 20:56:21 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '79'
status:
code: 200
message: OK
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.0/sentiment?showStats=false
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "I like nature. I do not like being
inside", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '99'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"0","sentiment":"mixed","confidenceScores":{"positive":0.44,"neutral":0.27,"negative":0.29},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.88,"neutral":0.11,"negative":0.01},"offset":0,"length":14,"text":"I
like nature."},{"sentiment":"negative","confidenceScores":{"positive":0.01,"neutral":0.43,"negative":0.56},"offset":15,"length":26,"text":"I
do not like being inside"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id: 22d88cc1-51fb-48e0-a335-d14b72e1d125
content-type: application/json; charset=utf-8
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
date: Fri, 28 Aug 2020 18:31:18 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '92'
status:
code: 200
message: OK
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
and Paul Allen", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '108'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/entities/recognition/general?showStats=false
response:
body:
string: '{"documents":[{"id":"0","entities":[{"text":"Microsoft","category":"Organization","offset":0,"length":9,"confidenceScore":0.82},{"text":"Bill
Gates","category":"Person","offset":25,"length":10,"confidenceScore":0.84},{"text":"Paul
Allen","category":"Person","offset":40,"length":10,"confidenceScore":0.89}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- 8ebab42d-0090-4d36-8e52-721f4c4b87d7
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Thu, 27 Aug 2020 20:56:21 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '82'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
and Paul Allen", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '108'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/entities/recognition/general?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"0","entities":[{"text":"Microsoft","category":"Organization","offset":0,"length":9,"confidenceScore":0.82},{"text":"Bill
Gates","category":"Person","offset":25,"length":10,"confidenceScore":0.84},{"text":"Paul
Allen","category":"Person","offset":40,"length":10,"confidenceScore":0.89}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- c588af7e-ff6c-4bca-9be0-bc50b81df611
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Fri, 28 Aug 2020 18:31:19 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '80'
status:
code: 200
message: OK
version: 1
Loading

0 comments on commit d25ce0e

Please sign in to comment.