Skip to content

Commit 778d11c

Browse files
authored
feat: Update Toolbox for OCR 2.0 features (#171)
* Add Symbol to Page wrapper - Refactored classes to use a Base Class and greatly simplified the file. * Add MathFormula * Update Quickstart Sample and inline samples for creation methods * Update tests to improve coverage * simplify code for `from_document_path()` * Fix import statement order * Update based on review comments * Add Class attributes to docstring * Added clarification in docstring about Symbols
1 parent cba079c commit 778d11c

File tree

6 files changed

+33830
-114
lines changed

6 files changed

+33830
-114
lines changed

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/document.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -390,19 +390,19 @@ def from_document_path(
390390
Document:
391391
A document from local `document_path`.
392392
"""
393-
document_paths = [document_path]
393+
document_paths = (
394+
glob.glob(os.path.join(document_path, f"*{constants.JSON_EXTENSION}"))
395+
if os.path.isdir(document_path)
396+
else [document_path]
397+
)
394398

395-
if os.path.isdir(document_path):
396-
document_paths = glob.glob(
397-
os.path.join(document_path, f"*{constants.JSON_EXTENSION}")
399+
documents = [
400+
documentai.Document.from_json(
401+
open(file_path, "r", encoding="utf-8").read(),
402+
ignore_unknown_fields=True,
398403
)
399-
400-
documents = []
401-
for file_path in document_paths:
402-
with open(file_path, "r", encoding="utf-8") as f:
403-
documents.append(
404-
documentai.Document.from_json(f.read(), ignore_unknown_fields=True)
405-
)
404+
for file_path in document_paths
405+
]
406406

407407
return cls(shards=documents)
408408

@@ -474,10 +474,12 @@ def from_batch_process_metadata(
474474
.. code-block:: python
475475
476476
from google.cloud import documentai
477+
from google.cloud.documentai_toolbox import document
477478
478479
operation = client.batch_process_documents(request)
479480
operation.result(timeout=timeout)
480481
metadata = documentai.BatchProcessMetadata(operation.metadata)
482+
wrapped_document = document.Document.from_batch_process_metadata(metadata)
481483
482484
Args:
483485
metadata (documentai.BatchProcessMetadata):
@@ -507,9 +509,11 @@ def from_batch_process_operation(
507509
.. code-block:: python
508510
509511
from google.cloud import documentai
512+
from google.cloud.documentai_toolbox import document
510513
511514
operation = client.batch_process_documents(request)
512515
operation_name = operation.operation.name
516+
wrapped_document = document.Document.from_batch_process_operation(operation_name)
513517
514518
Args:
515519
location (str):

0 commit comments

Comments
 (0)