Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[text analytics] add string-index-type support #13378

Merged
merged 8 commits into from
Aug 28, 2020
  •  
  •  
  •  
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ class CategorizedEntity(DictMixin):
:ivar subcategory: Entity subcategory, such as Age/Year/TimeRange etc
:vartype subcategory: str
:ivar int offset: The entity text offset from the start of the document.
:ivar int length: The length of the entity text.
Returned in unicode code points.
:ivar int length: The length of the entity text. Returned
in unicode code points.
:ivar confidence_score: Confidence score between 0 and 1 of the extracted
entity.
:vartype confidence_score: float
Expand Down Expand Up @@ -253,7 +255,9 @@ class PiiEntity(DictMixin):
:ivar str subcategory: Entity subcategory, such as Credit Card/EU
Phone number/ABA Routing Numbers, etc.
:ivar int offset: The PII entity text offset from the start of the document.
:ivar int length: The length of the PII entity text.
Returned in unicode code points.
:ivar int length: The length of the PII entity text. Returned
in unicode code points.
:ivar float confidence_score: Confidence score between 0 and 1 of the extracted
entity.
"""
Expand Down Expand Up @@ -636,7 +640,9 @@ class LinkedEntityMatch(DictMixin):
:vartype confidence_score: float
:ivar text: Entity text as appears in the request.
:ivar int offset: The linked entity match text offset from the start of the document.
:ivar int length: The length of the linked entity match text.
Returned in unicode code points.
:ivar int length: The length of the linked entity match text. Returned
in unicode code points.
:vartype text: str
"""

Expand Down Expand Up @@ -738,8 +744,10 @@ class SentenceSentiment(DictMixin):
and 1 for the sentence for all labels.
:vartype confidence_scores:
~azure.ai.textanalytics.SentimentConfidenceScores
:ivar int offset: The sentence offset from the start of the document.
:ivar int length: The length of the sentence.
:ivar int offset: The sentence offset from the start of the document. Returned
in unicode code points.
:ivar int length: The length of the sentence. Returned
in unicode code points.
:ivar mined_opinions: The list of opinions mined from this sentence.
For example in "The food is good, but the service is bad", we would
mind these two opinions "food is good", "service is bad". Only returned
Expand Down Expand Up @@ -847,8 +855,10 @@ class AspectSentiment(DictMixin):
for 'neutral' will always be 0
:vartype confidence_scores:
~azure.ai.textanalytics.SentimentConfidenceScores
:ivar int offset: The aspect offset from the start of the document.
:ivar int length: The length of the aspect.
:ivar int offset: The aspect offset from the start of the document. Returned
in unicode code points.
:ivar int length: The length of the aspect. Returned
in unicode code points.
"""

def __init__(self, **kwargs):
Expand Down Expand Up @@ -892,8 +902,10 @@ class OpinionSentiment(DictMixin):
for 'neutral' will always be 0
:vartype confidence_scores:
~azure.ai.textanalytics.SentimentConfidenceScores
:ivar int offset: The opinion offset from the start of the document.
:ivar int length: The length of the opinion.
:ivar int offset: The opinion offset from the start of the document. Returned
in unicode code points.
:ivar int length: The length of the opinion. Returned
in unicode code points.
:ivar bool is_negated: Whether the opinion is negated. For example, in
"The food is not good", the opinion "good" is negated.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def __init__(self, endpoint, credential, **kwargs):
)
self._default_language = kwargs.pop("default_language", "en")
self._default_country_hint = kwargs.pop("default_country_hint", "US")
self._string_code_unit = None if kwargs.get("api_version") == "v3.0" else "UnicodeCodePoint"

@distributed_trace
def detect_language( # type: ignore
Expand Down Expand Up @@ -213,6 +214,8 @@ def recognize_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return self._client.entities_recognition_general(
documents=docs,
Expand Down Expand Up @@ -278,6 +281,8 @@ def recognize_pii_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return self._client.entities_recognition_pii(
documents=docs,
Expand Down Expand Up @@ -350,6 +355,8 @@ def recognize_linked_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return self._client.entities_linking(
documents=docs,
Expand Down Expand Up @@ -490,6 +497,8 @@ def analyze_sentiment( # type: ignore
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})

if show_opinion_mining is not None:
kwargs.update({"opinion_mining": show_opinion_mining})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def __init__( # type: ignore
)
self._default_language = kwargs.pop("default_language", "en")
self._default_country_hint = kwargs.pop("default_country_hint", "US")
self._string_code_unit = None if kwargs.get("api_version") == "v3.0" else "UnicodeCodePoint"

@distributed_trace_async
async def detect_language( # type: ignore
Expand Down Expand Up @@ -216,6 +217,8 @@ async def recognize_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return await self._client.entities_recognition_general(
documents=docs,
Expand Down Expand Up @@ -280,6 +283,8 @@ async def recognize_pii_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return await self._client.entities_recognition_pii(
documents=docs,
Expand Down Expand Up @@ -351,6 +356,8 @@ async def recognize_linked_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return await self._client.entities_linking(
documents=docs,
Expand Down Expand Up @@ -489,6 +496,8 @@ async def analyze_sentiment( # type: ignore
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})

if show_opinion_mining is not None:
kwargs.update({"opinion_mining": show_opinion_mining})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=true&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=true&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"statistics":{"documentsCount":3,"validDocumentsCount":3,"erroneousDocumentsCount":0,"transactionsCount":3},"documents":[{"id":"1","sentiment":"neutral","statistics":{"charactersCount":51,"transactionsCount":1},"confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"sentences":[{"sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"offset":0,"length":51,"text":"Microsoft
Expand All @@ -30,21 +30,21 @@ interactions:
recommend you try it."}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- b1e4352f-1e0f-46e3-9f6e-5a82195726b5
- 546ef146-2055-49be-945d-8b4d95870565
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=3
date:
- Wed, 26 Aug 2020 21:20:39 GMT
- Thu, 27 Aug 2020 19:31:50 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '91'
- '84'
status:
code: 200
message: OK
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"1","sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"sentences":[{"sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"offset":0,"length":51,"text":"Microsoft
Expand All @@ -30,21 +30,21 @@ interactions:
recommend you try it."}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- 36f47b42-b805-4655-9cc9-ed373487b586
- ee67d363-828c-4a5b-92ee-4a943a9aa020
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=3
date:
- Wed, 26 Aug 2020 21:20:35 GMT
- Thu, 27 Aug 2020 19:31:50 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '83'
- '95'
status:
code: 200
message: OK
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"error":{"code":"401","message":"Access denied due to invalid subscription
Expand All @@ -26,7 +26,7 @@ interactions:
content-length:
- '224'
date:
- Wed, 26 Aug 2020 21:20:35 GMT
- Thu, 27 Aug 2020 19:31:56 GMT
status:
code: 401
message: PermissionDenied
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,26 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?model-version=bad&showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?model-version=bad&showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"error":{"code":"InvalidRequest","message":"Invalid Request.","innererror":{"code":"ModelVersionIncorrect","message":"Invalid
model version. Possible values are: latest,2019-10-01,2020-04-01"}}}'
headers:
apim-request-id:
- e98c3279-f8c4-49ce-b25c-f51289330fdd
- 600cfe88-8c7b-4017-a50e-ef0c30a546a4
content-type:
- application/json; charset=utf-8
date:
- Wed, 26 Aug 2020 21:20:35 GMT
- Thu, 27 Aug 2020 19:31:56 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '10'
- '4'
status:
code: 400
message: Bad Request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -760,26 +760,26 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"error":{"code":"InvalidRequest","message":"Invalid document in request.","innererror":{"code":"InvalidDocumentBatch","message":"Batch
request contains too many records. Max 10 records are permitted."}}}'
headers:
apim-request-id:
- 5bcf6f2d-8a67-4bf7-a552-67c0c0ce9f9b
- e63eddb4-ac2c-4b1d-bfa8-ff78dc65076f
content-type:
- application/json; charset=utf-8
date:
- Wed, 26 Aug 2020 21:20:36 GMT
- Thu, 27 Aug 2020 19:31:50 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '13'
- '12'
status:
code: 400
message: Bad Request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -725,18 +725,18 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"error":{"code":"InvalidRequest","message":"Invalid document in request.","innererror":{"code":"InvalidDocumentBatch","message":"Batch
request contains too many records. Max 10 records are permitted."}}}'
headers:
apim-request-id:
- 35aa5189-c6e8-46c5-9339-607d86aef6a1
- 22ce0f08-e152-4611-bf63-9cc9ae125568
content-type:
- application/json; charset=utf-8
date:
- Wed, 26 Aug 2020 21:20:39 GMT
- Thu, 27 Aug 2020 19:31:50 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
Expand Down
Loading