Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[formrecognizer] rename input parameters #11518

Merged
merged 1 commit into from
May 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
- `FormField` does not have a page_number.
- `begin_recognize_receipts` APIs now return `RecognizedReceipt` instead of `USReceipt`
- `USReceiptType` is renamed to `ReceiptType`
- `stream` and `url` parameters found on methods for `FormRecognizerClient` have been renamed to `form` and `form_url`, respectively.
For recognize receipt methods, parameters have been renamed to `receipt` and `receipt_url`.



**New features**

Expand Down
2 changes: 1 addition & 1 deletion sdk/formrecognizer/azure-ai-formrecognizer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ model_id = "<your custom model id>"
with open("<path to your form>", "rb") as fd:
form = fd.read()

poller = form_recognizer_client.begin_recognize_custom_forms(model_id=model_id, stream=form)
poller = form_recognizer_client.begin_recognize_custom_forms(model_id=model_id, form=form)
result = poller.result()

for recognized_form in result:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ def _receipt_callback(self, raw_response, _, headers): # pylint: disable=unused
return prepare_us_receipt(analyze_result)

@distributed_trace
def begin_recognize_receipts(self, stream, **kwargs):
def begin_recognize_receipts(self, receipt, **kwargs):
# type: (Union[bytes, IO[bytes]], Any) -> LROPoller
"""Extract field text and semantic values from a given US sales receipt.
The input document must be of one of the supported content types - 'application/pdf',
'image/jpeg', 'image/png' or 'image/tiff'.

:param stream: JPEG, PNG, PDF and TIFF type file stream or bytes.
:param receipt: JPEG, PNG, PDF and TIFF type file stream or bytes.
Currently only supports US sales receipts.
:type stream: bytes or IO[bytes]
:type receipt: bytes or IO[bytes]
:keyword bool include_text_content:
Whether or not to include text elements such as lines and words in addition to form fields.
:keyword str content_type: Media type of the body sent to the API. Content-type is
Expand Down Expand Up @@ -116,10 +116,10 @@ def begin_recognize_receipts(self, stream, **kwargs):
include_text_content = kwargs.pop("include_text_content", False)

if content_type is None:
content_type = get_content_type(stream)
content_type = get_content_type(receipt)

return self._client.begin_analyze_receipt_async(
file_stream=stream,
file_stream=receipt,
content_type=content_type,
include_text_details=include_text_content,
cls=kwargs.pop("cls", self._receipt_callback),
Expand All @@ -129,12 +129,12 @@ def begin_recognize_receipts(self, stream, **kwargs):
)

@distributed_trace
def begin_recognize_receipts_from_url(self, url, **kwargs):
def begin_recognize_receipts_from_url(self, receipt_url, **kwargs):
# type: (str, Any) -> LROPoller
"""Extract field text and semantic values from a given US sales receipt.
The input document must be the location (Url) of the receipt to be analyzed.

:param str url: The url of the receipt to analyze. The input must be a valid, encoded url
:param str receipt_url: The url of the receipt to analyze. The input must be a valid, encoded url
of one of the supported formats: JPEG, PNG, PDF and TIFF. Currently only supports
US sales receipts.
:keyword bool include_text_content:
Expand All @@ -160,7 +160,7 @@ def begin_recognize_receipts_from_url(self, url, **kwargs):
include_text_content = kwargs.pop("include_text_content", False)

return self._client.begin_analyze_receipt_async(
file_stream={"source": url},
file_stream={"source": receipt_url},
include_text_details=include_text_content,
cls=kwargs.pop("cls", self._receipt_callback),
polling=LROBasePolling(timeout=polling_interval, **kwargs),
Expand All @@ -173,14 +173,14 @@ def _content_callback(self, raw_response, _, headers): # pylint: disable=unused
return prepare_content_result(analyze_result)

@distributed_trace
def begin_recognize_content(self, stream, **kwargs):
def begin_recognize_content(self, form, **kwargs):
# type: (Union[bytes, IO[bytes]], Any) -> LROPoller
"""Extract text and content/layout information from a given document.
The input document must be of one of the supported content types - 'application/pdf',
'image/jpeg', 'image/png' or 'image/tiff'.

:param stream: JPEG, PNG, PDF and TIFF type file stream or bytes.
:type stream: bytes or IO[bytes]
:param form: JPEG, PNG, PDF and TIFF type file stream or bytes.
:type form: bytes or IO[bytes]
:keyword str content_type: Media type of the body sent to the API. Content-type is
auto-detected, but can be overridden by passing this keyword argument. For options,
see :class:`~azure.ai.formrecognizer.FormContentType`.
Expand All @@ -207,10 +207,10 @@ def begin_recognize_content(self, stream, **kwargs):
raise TypeError("Call begin_recognize_content_from_url() to analyze a document from a url.")

if content_type is None:
content_type = get_content_type(stream)
content_type = get_content_type(form)

return self._client.begin_analyze_layout_async(
file_stream=stream,
file_stream=form,
content_type=content_type,
cls=kwargs.pop("cls", self._content_callback),
polling=LROBasePolling(timeout=polling_interval, **kwargs),
Expand All @@ -219,12 +219,12 @@ def begin_recognize_content(self, stream, **kwargs):
)

@distributed_trace
def begin_recognize_content_from_url(self, url, **kwargs):
def begin_recognize_content_from_url(self, form_url, **kwargs):
# type: (str, Any) -> LROPoller
"""Extract text and layout information from a given document.
The input document must be the location (Url) of the document to be analyzed.

:param str url: The url of the form to analyze. The input must be a valid, encoded url
:param str form_url: The url of the form to analyze. The input must be a valid, encoded url
of one of the supported formats: JPEG, PNG, PDF and TIFF.
:keyword int polling_interval: Waiting time between two polls for LRO operations
if no Retry-After header is present. Defaults to 5 seconds.
Expand All @@ -237,24 +237,24 @@ def begin_recognize_content_from_url(self, url, **kwargs):
polling_interval = kwargs.pop("polling_interval", POLLING_INTERVAL)

return self._client.begin_analyze_layout_async(
file_stream={"source": url},
file_stream={"source": form_url},
cls=kwargs.pop("cls", self._content_callback),
polling=LROBasePolling(timeout=polling_interval, **kwargs),
error_map=error_map,
**kwargs
)

@distributed_trace
def begin_recognize_custom_forms(self, model_id, stream, **kwargs):
def begin_recognize_custom_forms(self, model_id, form, **kwargs):
# type: (str, Union[bytes, IO[bytes]], Any) -> LROPoller
"""Analyze a custom form with a model trained with or without labels. The form
to analyze should be of the same type as the forms that were used to train the model.
The input document must be of one of the supported content types - 'application/pdf',
'image/jpeg', 'image/png' or 'image/tiff'.

:param str model_id: Custom model identifier.
:param stream: JPEG, PNG, PDF and TIFF type file stream or bytes.
:type stream: bytes or IO[bytes]
:param form: JPEG, PNG, PDF and TIFF type file stream or bytes.
:type form: bytes or IO[bytes]
:keyword bool include_text_content:
Whether or not to include text elements such as lines and words in addition to form fields.
:keyword str content_type: Media type of the body sent to the API. Content-type is
Expand Down Expand Up @@ -285,15 +285,15 @@ def begin_recognize_custom_forms(self, model_id, stream, **kwargs):

include_text_content = kwargs.pop("include_text_content", False)
if content_type is None:
content_type = get_content_type(stream)
content_type = get_content_type(form)

def analyze_callback(raw_response, _, headers): # pylint: disable=unused-argument
analyze_result = self._client._deserialize(AnalyzeOperationResult, raw_response)
return prepare_form_result(analyze_result, model_id)

deserialization_callback = cls if cls else analyze_callback
return self._client.begin_analyze_with_custom_model(
file_stream=stream,
file_stream=form,
model_id=model_id,
include_text_details=include_text_content,
content_type=content_type,
Expand All @@ -304,14 +304,14 @@ def analyze_callback(raw_response, _, headers): # pylint: disable=unused-argume
)

@distributed_trace
def begin_recognize_custom_forms_from_url(self, model_id, url, **kwargs):
def begin_recognize_custom_forms_from_url(self, model_id, form_url, **kwargs):
# type: (str, str, Any) -> LROPoller
"""Analyze a custom form with a model trained with or without labels. The form
to analyze should be of the same type as the forms that were used to train the model.
The input document must be the location (Url) of the document to be analyzed.

:param str model_id: Custom model identifier.
:param str url: The url of the form to analyze. The input must be a valid, encoded url
:param str form_url: The url of the form to analyze. The input must be a valid, encoded url
of one of the supported formats: JPEG, PNG, PDF and TIFF.
:keyword bool include_text_content:
Whether or not to include text elements such as lines and words in addition to form fields.
Expand All @@ -333,7 +333,7 @@ def analyze_callback(raw_response, _, headers): # pylint: disable=unused-argume

deserialization_callback = cls if cls else analyze_callback
return self._client.begin_analyze_with_custom_model(
file_stream={"source": url},
file_stream={"source": form_url},
model_id=model_id,
include_text_details=include_text_content,
cls=deserialization_callback,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,16 @@ def _receipt_callback(self, raw_response, _, headers): # pylint: disable=unused
@distributed_trace_async
async def recognize_receipts(
self,
stream: Union[bytes, IO[bytes]],
receipt: Union[bytes, IO[bytes]],
**kwargs: Any
) -> List["RecognizedReceipt"]:
"""Extract field text and semantic values from a given US sales receipt.
The input document must be of one of the supported content types - 'application/pdf',
'image/jpeg', 'image/png' or 'image/tiff'.

:param stream: JPEG, PNG, PDF and TIFF type file stream or bytes.
:param receipt: JPEG, PNG, PDF and TIFF type file stream or bytes.
Currently only supports US sales receipts.
:type stream: bytes or IO[bytes]
:type receipt: bytes or IO[bytes]
:keyword bool include_text_content:
Whether or not to include text elements such as lines and words in addition to form fields.
:keyword str content_type: Media type of the body sent to the API. Content-type is
Expand Down Expand Up @@ -128,10 +128,10 @@ async def recognize_receipts(
include_text_content = kwargs.pop("include_text_content", False)

if content_type is None:
content_type = get_content_type(stream)
content_type = get_content_type(receipt)

return await self._client.analyze_receipt_async( # type: ignore
file_stream=stream,
file_stream=receipt,
content_type=content_type,
include_text_details=include_text_content,
cls=kwargs.pop("cls", self._receipt_callback),
Expand All @@ -143,13 +143,13 @@ async def recognize_receipts(
@distributed_trace_async
async def recognize_receipts_from_url(
self,
url: str,
receipt_url: str,
**kwargs: Any
) -> List["RecognizedReceipt"]:
"""Extract field text and semantic values from a given US sales receipt.
The input document must be the location (Url) of the receipt to be analyzed.

:param str url: The url of the receipt to analyze. The input must be a valid, encoded url
:param str receipt_url: The url of the receipt to analyze. The input must be a valid, encoded url
of one of the supported formats: JPEG, PNG, PDF and TIFF. Currently only supports
US sales receipts.
:keyword bool include_text_content:
Expand All @@ -174,7 +174,7 @@ async def recognize_receipts_from_url(
include_text_content = kwargs.pop("include_text_content", False)

return await self._client.analyze_receipt_async( # type: ignore
file_stream={"source": url},
file_stream={"source": receipt_url},
include_text_details=include_text_content,
cls=kwargs.pop("cls", self._receipt_callback),
polling=AsyncLROBasePolling(timeout=polling_interval, **kwargs),
Expand All @@ -187,13 +187,13 @@ def _content_callback(self, raw_response, _, headers): # pylint: disable=unused
return prepare_content_result(analyze_result)

@distributed_trace_async
async def recognize_content(self, stream: Union[bytes, IO[bytes]], **kwargs: Any) -> List["FormPage"]:
async def recognize_content(self, form: Union[bytes, IO[bytes]], **kwargs: Any) -> List["FormPage"]:
"""Extract text and content/layout information from a given document.
The input document must be of one of the supported content types - 'application/pdf',
'image/jpeg', 'image/png' or 'image/tiff'.

:param stream: JPEG, PNG, PDF and TIFF type file stream or bytes.
:type stream: bytes or IO[bytes]
:param form: JPEG, PNG, PDF and TIFF type file stream or bytes.
:type form: bytes or IO[bytes]
:keyword str content_type: Media type of the body sent to the API. Content-type is
auto-detected, but can be overridden by passing this keyword argument. For options,
see :class:`~azure.ai.formrecognizer.FormContentType`.
Expand All @@ -219,10 +219,10 @@ async def recognize_content(self, stream: Union[bytes, IO[bytes]], **kwargs: Any
raise TypeError("Call begin_recognize_content_from_url() to analyze a document from a url.")

if content_type is None:
content_type = get_content_type(stream)
content_type = get_content_type(form)

return await self._client.analyze_layout_async( # type: ignore
file_stream=stream,
file_stream=form,
content_type=content_type,
cls=kwargs.pop("cls", self._content_callback),
polling=AsyncLROBasePolling(timeout=polling_interval, **kwargs),
Expand All @@ -231,11 +231,11 @@ async def recognize_content(self, stream: Union[bytes, IO[bytes]], **kwargs: Any
)

@distributed_trace_async
async def recognize_content_from_url(self, url: str, **kwargs: Any) -> List["FormPage"]:
async def recognize_content_from_url(self, form_url: str, **kwargs: Any) -> List["FormPage"]:
"""Extract text and layout information from a given document.
The input document must be the location (Url) of the document to be analyzed.

:param str url: The url of the form to analyze. The input must be a valid, encoded url
:param str form_url: The url of the form to analyze. The input must be a valid, encoded url
of one of the supported formats: JPEG, PNG, PDF and TIFF.
:keyword int polling_interval: Waiting time between two polls for LRO operations
if no Retry-After header is present. Defaults to 5 seconds.
Expand All @@ -246,7 +246,7 @@ async def recognize_content_from_url(self, url: str, **kwargs: Any) -> List["For

polling_interval = kwargs.pop("polling_interval", POLLING_INTERVAL)
return await self._client.analyze_layout_async( # type: ignore
file_stream={"source": url},
file_stream={"source": form_url},
cls=kwargs.pop("cls", self._content_callback),
polling=AsyncLROBasePolling(timeout=polling_interval, **kwargs),
error_map=error_map,
Expand All @@ -257,7 +257,7 @@ async def recognize_content_from_url(self, url: str, **kwargs: Any) -> List["For
async def recognize_custom_forms(
self,
model_id: str,
stream: Union[bytes, IO[bytes]],
form: Union[bytes, IO[bytes]],
**kwargs: Any
) -> List["RecognizedForm"]:
"""Analyze a custom form with a model trained with or without labels. The form
Expand All @@ -266,8 +266,8 @@ async def recognize_custom_forms(
'image/jpeg', 'image/png' or 'image/tiff'.

:param str model_id: Custom model identifier.
:param stream: JPEG, PNG, PDF and TIFF type file stream or bytes.
:type stream: bytes or IO[bytes]
:param form: JPEG, PNG, PDF and TIFF type file stream or bytes.
:type form: bytes or IO[bytes]
:keyword bool include_text_content:
Whether or not to include text elements such as lines and words in addition to form fields.
:keyword str content_type: Media type of the body sent to the API. Content-type is
Expand Down Expand Up @@ -298,15 +298,15 @@ async def recognize_custom_forms(
include_text_content = kwargs.pop("include_text_content", False)

if content_type is None:
content_type = get_content_type(stream)
content_type = get_content_type(form)

def analyze_callback(raw_response, _, headers): # pylint: disable=unused-argument
analyze_result = self._client._deserialize(AnalyzeOperationResult, raw_response)
return prepare_form_result(analyze_result, model_id)

deserialization_callback = cls if cls else analyze_callback
return await self._client.analyze_with_custom_model( # type: ignore
file_stream=stream,
file_stream=form,
model_id=model_id,
include_text_details=include_text_content,
content_type=content_type,
Expand All @@ -320,15 +320,15 @@ def analyze_callback(raw_response, _, headers): # pylint: disable=unused-argume
async def recognize_custom_forms_from_url(
self,
model_id: str,
url: str,
form_url: str,
**kwargs: Any
) -> List["RecognizedForm"]:
"""Analyze a custom form with a model trained with or without labels. The form
to analyze should be of the same type as the forms that were used to train the model.
The input document must be the location (Url) of the document to be analyzed.

:param str model_id: Custom model identifier.
:param str url: The url of the form to analyze. The input must be a valid, encoded url
:param str form_url: The url of the form to analyze. The input must be a valid, encoded url
of one of the supported formats: JPEG, PNG, PDF and TIFF.
:keyword bool include_text_content:
Whether or not to include text elements such as lines and words in addition to form fields.
Expand All @@ -349,7 +349,7 @@ def analyze_callback(raw_response, _, headers): # pylint: disable=unused-argume

deserialization_callback = cls if cls else analyze_callback
return await self._client.analyze_with_custom_model( # type: ignore
file_stream={"source": url},
file_stream={"source": form_url},
model_id=model_id,
include_text_details=include_text_content,
cls=deserialization_callback,
Expand Down
Loading