Skip to content

Commit

Permalink
[formrecognizer] docs edits (Azure#21030)
Browse files Browse the repository at this point in the history
* readme edits

* fix some docstrings

* fix asyncitempaged docstring type

* samples edits

* remove unimported helper method from migration guide

* update link on build model
  • Loading branch information
kristapratico authored Oct 4, 2021
1 parent 9922c0b commit dc22490
Show file tree
Hide file tree
Showing 11 changed files with 40 additions and 36 deletions.
30 changes: 15 additions & 15 deletions sdk/formrecognizer/azure-ai-formrecognizer/MIGRATION_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,21 +249,21 @@ for idx, content in enumerate(form_pages):
))
for table_idx, table in enumerate(content.tables):
print("Table # {} has {} rows and {} columns".format(table_idx, table.row_count, table.column_count))
print("Table # {} location on page: {}".format(table_idx, format_bounding_box(table.bounding_box)))
print("Table # {} location on page: {}".format(table_idx, table.bounding_box))
for cell in table.cells:
print("...Cell[{}][{}] has text '{}' within bounding box '{}'".format(
cell.row_index,
cell.column_index,
cell.text,
format_bounding_box(cell.bounding_box)
cell.bounding_box
))

for line_idx, line in enumerate(content.lines):
print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
line_idx,
len(line.words),
line.text,
format_bounding_box(line.bounding_box)
line.bounding_box
))
if line.appearance:
if line.appearance.style_name == "handwriting" and line.appearance.style_confidence > 0.8:
Expand All @@ -274,7 +274,7 @@ for idx, content in enumerate(form_pages):
for selection_mark in content.selection_marks:
print("Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
selection_mark.state,
format_bounding_box(selection_mark.bounding_box),
selection_mark.bounding_box,
selection_mark.confidence
))
print("----------------------------------------")
Expand Down Expand Up @@ -309,7 +309,7 @@ for idx, page in enumerate(result.pages):
"Line # {} has text content '{}' within bounding box '{}'".format(
line_idx,
line.content,
format_bounding_box(line.bounding_box),
line.bounding_box,
)
)

Expand All @@ -324,7 +324,7 @@ for idx, page in enumerate(result.pages):
print(
"Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
selection_mark.state,
format_bounding_box(selection_mark.bounding_box),
selection_mark.bounding_box,
selection_mark.confidence,
)
)
Expand All @@ -340,7 +340,7 @@ for table_idx, table in enumerate(result.tables):
"Table # {} location on page: {} is {}".format(
table_idx,
region.page_number,
format_bounding_box(region.bounding_box),
region.bounding_box,
)
)
for cell in table.cells:
Expand All @@ -355,7 +355,7 @@ for table_idx, table in enumerate(result.tables):
print(
"...content on page {} is within bounding box '{}'".format(
region.page_number,
format_bounding_box(region.bounding_box),
region.bounding_box,
)
)

Expand Down Expand Up @@ -393,7 +393,7 @@ for page in result.pages:
"...Line # {} has text content '{}' within bounding box '{}'".format(
line_idx,
line.content,
format_bounding_box(line.bounding_box),
line.bounding_box,
)
)

Expand All @@ -408,7 +408,7 @@ for page in result.pages:
print(
"...Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
selection_mark.state,
format_bounding_box(selection_mark.bounding_box),
selection_mark.bounding_box,
selection_mark.confidence,
)
)
Expand All @@ -424,7 +424,7 @@ for table_idx, table in enumerate(result.tables):
"Table # {} location on page: {} is {}".format(
table_idx,
region.page_number,
format_bounding_box(region.bounding_box),
region.bounding_box,
)
)
for cell in table.cells:
Expand All @@ -439,15 +439,15 @@ for table_idx, table in enumerate(result.tables):
print(
"...content on page {} is within bounding box '{}'\n".format(
region.page_number,
format_bounding_box(region.bounding_box),
region.bounding_box,
)
)

print("----Entities found in document----")
for entity in result.entities:
print("Entity of category '{}' with sub-category '{}'".format(entity.category, entity.sub_category))
print("...has content '{}'".format(entity.content))
print("...within '{}' bounding regions".format(format_bounding_region(entity.bounding_regions)))
print("...within '{}' bounding regions".format(entity.bounding_regions))
print("...with confidence {}\n".format(entity.confidence))

print("----Key-value pairs found in document----")
Expand All @@ -456,14 +456,14 @@ for kv_pair in result.key_value_pairs:
print(
"Key '{}' found within '{}' bounding regions".format(
kv_pair.key.content,
format_bounding_region(kv_pair.key.bounding_regions),
kv_pair.key.bounding_regions,
)
)
if kv_pair.value:
print(
"Value '{}' found within '{}' bounding regions\n".format(
kv_pair.value.content,
format_bounding_region(kv_pair.value.bounding_regions),
kv_pair.value.bounding_regions,
)
)
print("----------------------------------------")
Expand Down
6 changes: 3 additions & 3 deletions sdk/formrecognizer/azure-ai-formrecognizer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ Sample code snippets are provided to illustrate using long-running operations [b

The following section provides several code snippets covering some of the most common Form Recognizer tasks, including:

* [Extract layout](#extract-layout "Extract Layout")
* [Extract Layout](#extract-layout "Extract Layout")
* [Using Prebuilt Models](#using-prebuilt-models "Using Prebuilt Models")
* [Using Prebuilt Document](#using-prebuilt-document "Using Prebuilt Document")
* [Build a Model](#build-a-model "Build a model")
Expand Down Expand Up @@ -276,7 +276,7 @@ for table_idx, table in enumerate(result.tables):
### Using Prebuilt Models
Extract fields from select document types such as receipts, invoices, business cards, and identity documents using prebuilt models provided by the Form Recognizer service.

For example, to analyze fields from a sales receipt, use the prebuilt receipt model provided by passing `model="prebuilt-receipt"` into the `begin_analyze_documents` method:
For example, to analyze fields from a sales receipt, use the prebuilt receipt model provided by passing `model="prebuilt-receipt"` into the `begin_analyze_document` method:

```python
from azure.ai.formrecognizer import DocumentAnalysisClient
Expand Down Expand Up @@ -314,7 +314,7 @@ You are not limited to receipts! There are a few prebuilt models to choose from,

### Using Prebuilt Document
Analyze entities, key-value pairs, tables, styles, and selection marks from documents using the general prebuilt document model provided by the Form Recognizer service.
Select the Prebuilt Document model by passing `model="prebuilt-document"` into the `begin_analyze_documents` method:
Select the Prebuilt Document model by passing `model="prebuilt-document"` into the `begin_analyze_document` method:

```python
from azure.ai.formrecognizer import DocumentAnalysisClient
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def begin_build_model(self, source, **kwargs):
:param str source: An Azure Storage blob container's SAS URI. A container URI (without SAS)
can be used if the container is public. For more information on setting up a training data set, see:
https://docs.microsoft.com/azure/cognitive-services/form-recognizer/build-training-data-set
https://aka.ms/azsdk/formrecognizer/buildtrainingset
:keyword str model_id: A unique ID for your model. If not specified, a model ID will be created for you.
:keyword str description: An optional description to add to the model.
:keyword str prefix: A case-sensitive prefix string to filter documents in the source path.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2066,7 +2066,7 @@ def from_dict(cls, data):


class BoundingRegion(object):
"""The bounding box corresponding to a page.
"""The bounding region corresponding to a page.
:ivar list[~azure.ai.formrecognizer.Point] bounding_box:
A list of 4 points representing the quadrilateral bounding box
Expand Down Expand Up @@ -2132,7 +2132,9 @@ class DocumentElement(object):
:vartype content: str
:ivar bounding_box: Bounding box of the word.
:vartype bounding_box: list[Point]
:ivar str kind:
:ivar str kind: The kind of document element. Possible kinds are "word" or "selectionMark" which
correspond to a :class:`~azure.ai.formrecognizer.DocumentWord` or
:class:`~azure.ai.formrecognizer.DocumentSelectionMark`, respectively.
"""

def __init__(self, **kwargs):
Expand Down Expand Up @@ -2857,7 +2859,7 @@ class DocumentSelectionMark(DocumentElement):
:vartype span: ~azure.ai.formrecognizer.DocumentSpan
:ivar confidence: Confidence of correctly extracting the selection mark.
:vartype confidence: float
:ivar str kind:
:ivar str kind: For DocumentSelectionMark, this is "selectionMark".
"""

def __init__(self, **kwargs):
Expand Down Expand Up @@ -3418,7 +3420,7 @@ class DocumentWord(DocumentElement):
:vartype span: ~azure.ai.formrecognizer.DocumentSpan
:ivar confidence: Confidence of correctly extracting the word.
:vartype confidence: float
:ivar str kind:
:ivar str kind: For DocumentWord, this is "word".
"""

def __init__(self, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def parse_operation_id(location):


class DocumentModelAdministrationLROPoller(LROPoller[PollingReturnType]):
"""Custom poller for model build operations.
"""Custom poller for model build operations. Call `result()` on the poller to return
a :class:`~azure.ai.formrecognizer.DocumentModel`.
.. versionadded:: v2021-09-30-preview
The *DocumentModelAdministrationLROPoller* poller object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@


class AsyncDocumentModelAdministrationLROPoller(AsyncLROPoller[PollingReturnType]):
"""Custom poller for model build operations.
"""Custom poller for model build operations. Call `result()` on the poller to return
a :class:`~azure.ai.formrecognizer.DocumentModel`.
.. versionadded:: v2021-09-30-preview
The *AsyncDocumentModelAdministrationLROPoller* poller object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ async def begin_build_model(
:param str source: An Azure Storage blob container's SAS URI. A container URI (without SAS)
can be used if the container is public. For more information on setting up a training data set, see:
https://docs.microsoft.com/azure/cognitive-services/form-recognizer/build-training-data-set
https://aka.ms/azsdk/formrecognizer/buildtrainingset
:keyword str model_id: A unique ID for your model. If not specified, a model ID will be created for you.
:keyword str description: An optional description to add to the model.
:keyword str prefix: A case-sensitive prefix string to filter documents in the source path.
Expand Down Expand Up @@ -350,7 +350,7 @@ def list_models(self, **kwargs: Any) -> AsyncItemPaged[DocumentModelInfo]:
description, and when it was created.
:return: Pageable of DocumentModelInfo.
:rtype: ~azure.core.paging.async_paging.AsyncItemPaged[DocumentModelInfo]
:rtype: ~azure.core.async_paging.AsyncItemPaged[DocumentModelInfo]
:raises ~azure.core.exceptions.HttpResponseError:
.. admonition:: Example:
Expand Down Expand Up @@ -434,7 +434,7 @@ def list_operations(self, **kwargs: Any) -> AsyncItemPaged[ModelOperationInfo]:
the document model can be accessed using the :func:`~get_model` or :func:`~list_models` APIs.
:return: A pageable of ModelOperationInfo.
:rtype: ~azure.core.paging.async_paging.AsyncItemPaged[ModelOperationInfo]
:rtype: ~azure.core.async_paging.AsyncItemPaged[ModelOperationInfo]
:raises ~azure.core.exceptions.HttpResponseError:
.. admonition:: Example:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
This sample demonstrates how to extract text, selection marks, and layout information from a document
given through a file.
Note that selection marks returned from begin_analyze_document() do not return the text associated with
the checkbox. For the API to return this information, build a custom model to analyze the checkbox and its text.
See sample_build_model_async.py for more information.
Note that selection marks returned from begin_analyze_document(model="prebuilt-layout") do not return the text
associated with the checkbox. For the API to return this information, build a custom model to analyze the
checkbox and its text. See sample_build_model.py for more information.
USAGE:
python sample_analyze_layout_async.py
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
documents found in https://aka.ms/azsdk/formrecognizer/sampletrainingfiles
More details on setting up a container and required file structure can be found here:
https://docs.microsoft.com/azure/cognitive-services/form-recognizer/build-training-data-set
https://aka.ms/azsdk/formrecognizer/buildtrainingset
USAGE:
python sample_build_model_async.py
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
This sample demonstrates how to extract text, selection marks, and layout information from a document
given through a file.
Note that selection marks returned from begin_analyze_document() do not return the text associated with
the checkbox. For the API to return this information, build a custom model to analyze the checkbox and its text.
See sample_build_model.py for more information.
Note that selection marks returned from begin_analyze_document(model="prebuilt-layout") do not return the text
associated with the checkbox. For the API to return this information, build a custom model to analyze the
checkbox and its text. See sample_build_model.py for more information.
USAGE:
python sample_analyze_layout.py
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
documents found in https://aka.ms/azsdk/formrecognizer/sampletrainingfiles
More details on setting up a container and required file structure can be found here:
https://docs.microsoft.com/azure/cognitive-services/form-recognizer/build-training-data-set
https://aka.ms/azsdk/formrecognizer/buildtrainingset
USAGE:
python sample_build_model.py
Expand Down

0 comments on commit dc22490

Please sign in to comment.