Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: [google-cloud-documentai] A new message FoundationModelTuningOptions is added #12319

Merged
merged 2 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1963,7 +1963,9 @@ async def sample_create_processor():
processor (:class:`google.cloud.documentai_v1beta3.types.Processor`):
Required. The processor to be created, requires
[Processor.type][google.cloud.documentai.v1beta3.Processor.type]
and [Processor.display_name]][] to be set. Also, the
and
[Processor.display_name][google.cloud.documentai.v1beta3.Processor.display_name]
to be set. Also, the
[Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name]
field must be set if the processor is under CMEK.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2443,7 +2443,9 @@ def sample_create_processor():
processor (google.cloud.documentai_v1beta3.types.Processor):
Required. The processor to be created, requires
[Processor.type][google.cloud.documentai.v1beta3.Processor.type]
and [Processor.display_name]][] to be set. Also, the
and
[Processor.display_name][google.cloud.documentai.v1beta3.Processor.display_name]
to be set. Also, the
[Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name]
field must be set if the processor is under CMEK.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ class Dataset(proto.Message):
This field is a member of `oneof`_ ``storage_source``.
document_warehouse_config (google.cloud.documentai_v1beta3.types.Dataset.DocumentWarehouseConfig):
Optional. Document AI Warehouse-based dataset
configuration.
Optional. Deprecated. Warehouse-based dataset
configuration is not supported.
This field is a member of `oneof`_ ``storage_source``.
unmanaged_dataset_config (google.cloud.documentai_v1beta3.types.Dataset.UnmanagedDatasetConfig):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1491,8 +1491,10 @@ class PageRef(proto.Message):
[PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly]
instead.
bounding_poly (google.cloud.documentai_v1beta3.types.BoundingPoly):
Optional. Identifies the bounding polygon of
a layout element on the page.
Optional. Identifies the bounding polygon of a layout
element on the page. If ``layout_type`` is set, the bounding
polygon must be exactly the same to the layout element it's
referring to.
confidence (float):
Optional. Confidence of detected page element, if
applicable. Range ``[0, 1]``.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,9 @@ class ProcessOptions(proto.Message):

This field is a member of `oneof`_ ``page_range``.
ocr_config (google.cloud.documentai_v1beta3.types.OcrConfig):
Only applicable to ``OCR_PROCESSOR``. Returns error if set
on other processor types.
Only applicable to ``OCR_PROCESSOR`` and
``FORM_PARSER_PROCESSOR``. Returns error if set on other
processor types.
schema_override (google.cloud.documentai_v1beta3.types.DocumentSchema):
Optional. Override the schema of the
[ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion].
Expand Down Expand Up @@ -223,6 +224,15 @@ class ProcessRequest(proto.Message):
``pages.{page_field_name}``.
process_options (google.cloud.documentai_v1beta3.types.ProcessOptions):
Inference-time options for the process API
labels (MutableMapping[str, str]):
Optional. The labels with user-defined
metadata for the request.
Label keys and values can be no longer than 63
characters (Unicode codepoints) and can only
contain lowercase letters, numeric characters,
underscores, and dashes. International
characters are allowed. Label values are
optional. Label keys must start with a letter.
"""

inline_document: gcd_document.Document = proto.Field(
Expand Down Expand Up @@ -266,6 +276,11 @@ class ProcessRequest(proto.Message):
number=7,
message="ProcessOptions",
)
labels: MutableMapping[str, str] = proto.MapField(
proto.STRING,
proto.STRING,
number=10,
)


class HumanReviewStatus(proto.Message):
Expand Down Expand Up @@ -398,6 +413,15 @@ class BatchProcessRequest(proto.Message):
Default to ``false``.
process_options (google.cloud.documentai_v1beta3.types.ProcessOptions):
Inference-time options for the process API
labels (MutableMapping[str, str]):
Optional. The labels with user-defined
metadata for the request.
Label keys and values can be no longer than 63
characters (Unicode codepoints) and can only
contain lowercase letters, numeric characters,
underscores, and dashes. International
characters are allowed. Label values are
optional. Label keys must start with a letter.
"""

class BatchInputConfig(proto.Message):
Expand Down Expand Up @@ -476,6 +500,11 @@ class BatchOutputConfig(proto.Message):
number=7,
message="ProcessOptions",
)
labels: MutableMapping[str, str] = proto.MapField(
proto.STRING,
proto.STRING,
number=9,
)


class BatchProcessResponse(proto.Message):
Expand Down Expand Up @@ -1022,7 +1051,9 @@ class CreateProcessorRequest(proto.Message):
processor (google.cloud.documentai_v1beta3.types.Processor):
Required. The processor to be created, requires
[Processor.type][google.cloud.documentai.v1beta3.Processor.type]
and [Processor.display_name]][] to be set. Also, the
and
[Processor.display_name][google.cloud.documentai.v1beta3.Processor.display_name]
to be set. Also, the
[Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name]
field must be set if the processor is under CMEK.
"""
Expand Down Expand Up @@ -1217,6 +1248,10 @@ class TrainProcessorVersionRequest(proto.Message):
[TrainProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.TrainProcessorVersion]
method.

This message has `oneof`_ fields (mutually exclusive fields).
For each oneof, at most one member field can be set at the same time.
Setting any member of the oneof automatically clears all other
members.

.. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields

Expand All @@ -1225,6 +1260,11 @@ class TrainProcessorVersionRequest(proto.Message):
Options to control Custom Document Extraction
(CDE) Processor.

This field is a member of `oneof`_ ``processor_flags``.
foundation_model_tuning_options (google.cloud.documentai_v1beta3.types.TrainProcessorVersionRequest.FoundationModelTuningOptions):
Options to control foundation model tuning of
a processor.

This field is a member of `oneof`_ ``processor_flags``.
parent (str):
Required. The parent (project, location and processor) to
Expand Down Expand Up @@ -1301,12 +1341,43 @@ class TrainingMethod(proto.Enum):
enum="TrainProcessorVersionRequest.CustomDocumentExtractionOptions.TrainingMethod",
)

class FoundationModelTuningOptions(proto.Message):
r"""Options to control foundation model tuning of the processor.

Attributes:
train_steps (int):
Optional. The number of steps to run for
model tuning. Valid values are between 1 and
400. If not provided, recommended steps will be
used.
learning_rate_multiplier (float):
Optional. The multiplier to apply to the
recommended learning rate. Valid values are
between 0.1 and 10. If not provided, recommended
learning rate will be used.
"""

train_steps: int = proto.Field(
proto.INT32,
number=2,
)
learning_rate_multiplier: float = proto.Field(
proto.FLOAT,
number=3,
)

custom_document_extraction_options: CustomDocumentExtractionOptions = proto.Field(
proto.MESSAGE,
number=5,
oneof="processor_flags",
message=CustomDocumentExtractionOptions,
)
foundation_model_tuning_options: FoundationModelTuningOptions = proto.Field(
proto.MESSAGE,
number=12,
oneof="processor_flags",
message=FoundationModelTuningOptions,
)
parent: str = proto.Field(
proto.STRING,
number=1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ class Property(proto.Message):
name (str):
The name of the property. Follows the same
guidelines as the EntityType name.
display_name (str):
User defined name for the property.
value_type (str):
A reference to the value type of the property. This type is
subject to the same conventions as the ``Entity.base_types``
Expand All @@ -235,14 +237,14 @@ class Property(proto.Message):

class OccurrenceType(proto.Enum):
r"""Types of occurrences of the entity type in the document. This
represents the number of instances of instances of an entity, not
number of mentions of an entity. For example, a bank statement may
only have one ``account_number``, but this account number may be
mentioned in several places on the document. In this case the
'account_number' would be considered a ``REQUIRED_ONCE`` entity
type. If, on the other hand, we expect a bank statement to contain
the status of multiple different accounts for the customers, the
occurrence type will be set to ``REQUIRED_MULTIPLE``.
represents the number of instances, not mentions, of an entity. For
example, a bank statement might only have one ``account_number``,
but this account number can be mentioned in several places on the
document. In this case, the ``account_number`` is considered a
``REQUIRED_ONCE`` entity type. If, on the other hand, we expect a
bank statement to contain the status of multiple different accounts
for the customers, the occurrence type is set to
``REQUIRED_MULTIPLE``.

Values:
OCCURRENCE_TYPE_UNSPECIFIED (0):
Expand Down Expand Up @@ -272,6 +274,10 @@ class OccurrenceType(proto.Enum):
proto.STRING,
number=1,
)
display_name: str = proto.Field(
proto.STRING,
number=6,
)
value_type: str = proto.Field(
proto.STRING,
number=2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,17 @@ class DatasetSplitType(proto.Enum):


class DocumentLabelingState(proto.Enum):
r"""Describes the labelling status of a document.
r"""Describes the labeling status of a document.

Values:
DOCUMENT_LABELING_STATE_UNSPECIFIED (0):
Default value if the enum is not set.
DOCUMENT_LABELED (1):
Document has been labelled.
Document has been labeled.
DOCUMENT_UNLABELED (2):
Document has not been labelled.
Document has not been labeled.
DOCUMENT_AUTO_LABELED (3):
Document has been auto-labelled.
Document has been auto-labeled.
"""
DOCUMENT_LABELING_STATE_UNSPECIFIED = 0
DOCUMENT_LABELED = 1
Expand Down Expand Up @@ -417,9 +417,9 @@ class ListDocumentsRequest(proto.Message):
- String match is case sensitive (for filter
``DisplayName`` & ``EntityType``).
return_total_size (bool):
Optional. Controls if the ListDocuments request requires a
total size of matched documents. See
ListDocumentsResponse.total_size.
Optional. Controls if the request requires a total size of
matched documents. See
[ListDocumentsResponse.total_size][google.cloud.documentai.v1beta3.ListDocumentsResponse.total_size].

Enabling this flag may adversely impact performance.

Expand All @@ -428,11 +428,13 @@ class ListDocumentsRequest(proto.Message):
Optional. Number of results to skip beginning from the
``page_token`` if provided.
https://google.aip.dev/158#skipping-results. It must be a
non-negative integer. Negative values wil be rejected. Note
non-negative integer. Negative values will be rejected. Note
that this is not the number of pages to skip. If this value
causes the cursor to move past the end of results,
``ListDocumentsResponse.document_metadata`` and
``ListDocumentsResponse.next_page_token`` will be empty.
[ListDocumentsResponse.document_metadata][google.cloud.documentai.v1beta3.ListDocumentsResponse.document_metadata]
and
[ListDocumentsResponse.next_page_token][google.cloud.documentai.v1beta3.ListDocumentsResponse.next_page_token]
will be empty.
"""

dataset: str = proto.Field(
Expand Down Expand Up @@ -469,9 +471,10 @@ class ListDocumentsResponse(proto.Message):
Document metadata corresponding to the listed
documents.
next_page_token (str):
A token, which can be sent as ``page_token`` to retrieve the
next page. If this field is omitted, there are no subsequent
pages.
A token, which can be sent as
[ListDocumentsRequest.page_token][google.cloud.documentai.v1beta3.ListDocumentsRequest.page_token]
to retrieve the next page. If this field is omitted, there
are no subsequent pages.
total_size (int):
Total count of documents queried.
"""
Expand Down Expand Up @@ -671,7 +674,7 @@ class DocumentMetadata(proto.Message):
Type of the dataset split to which the
document belongs.
labeling_state (google.cloud.documentai_v1beta3.types.DocumentLabelingState):
Labelling state of the document.
Labeling state of the document.
display_name (str):
The display name of the document.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class documentaiCallTransformer(cst.CSTTransformer):
CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata')
METHOD_TO_PARAMS: Dict[str, Tuple[str]] = {
'batch_delete_documents': ('dataset', 'dataset_documents', ),
'batch_process_documents': ('name', 'input_configs', 'output_config', 'input_documents', 'document_output_config', 'skip_human_review', 'process_options', ),
'batch_process_documents': ('name', 'input_configs', 'output_config', 'input_documents', 'document_output_config', 'skip_human_review', 'process_options', 'labels', ),
'create_processor': ('parent', 'processor', ),
'delete_processor': ('name', ),
'delete_processor_version': ('name', ),
Expand All @@ -62,10 +62,10 @@ class documentaiCallTransformer(cst.CSTTransformer):
'list_processors': ('parent', 'page_size', 'page_token', ),
'list_processor_types': ('parent', 'page_size', 'page_token', ),
'list_processor_versions': ('parent', 'page_size', 'page_token', ),
'process_document': ('name', 'inline_document', 'raw_document', 'gcs_document', 'document', 'skip_human_review', 'field_mask', 'process_options', ),
'process_document': ('name', 'inline_document', 'raw_document', 'gcs_document', 'document', 'skip_human_review', 'field_mask', 'process_options', 'labels', ),
'review_document': ('human_review_config', 'inline_document', 'document', 'enable_schema_validation', 'priority', 'document_schema', ),
'set_default_processor_version': ('processor', 'default_processor_version', ),
'train_processor_version': ('parent', 'processor_version', 'custom_document_extraction_options', 'document_schema', 'input_data', 'base_processor_version', ),
'train_processor_version': ('parent', 'processor_version', 'custom_document_extraction_options', 'foundation_model_tuning_options', 'document_schema', 'input_data', 'base_processor_version', ),
'undeploy_processor_version': ('name', ),
'update_dataset': ('dataset', 'update_mask', ),
'update_dataset_schema': ('dataset_schema', 'update_mask', ),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4897,6 +4897,7 @@ def test_update_dataset_schema_rest(request_type):
"properties": [
{
"name": "name_value",
"display_name": "display_name_value",
"value_type": "value_type_value",
"occurrence_type": 1,
"property_metadata": {
Expand Down
Loading