Skip to content

Commit 2055433

Browse files
committed
Add suggestion from review and remove extra tests not relevant to this update
1 parent 6d5c255 commit 2055433

File tree

2 files changed

+20
-68
lines changed

2 files changed

+20
-68
lines changed

google/cloud/documentai_toolbox/utilities/gcs_utilities.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
# limitations under the License.
1515
#
1616
"""Google Cloud Storage utilities."""
17-
import importlib.metadata
1817
import os
1918
import re
2019
from typing import Dict, List, Optional, Tuple
@@ -143,18 +142,11 @@ def get_blob(
143142
if not re.match(constants.FILE_CHECK_REGEX, gcs_uri):
144143
raise ValueError("gcs_uri must link to a single file.")
145144

146-
try:
147-
version = importlib.metadata.version("google-cloud-storage")
148-
except importlib.metadata.PackageNotFoundError:
149-
raise ImportError("google-cloud-storage is not installed.")
145+
# google-cloud-storage >= 3.0.0
146+
if hasattr(storage.Blob, "from_uri"):
147+
return storage.Blob.from_uri(gcs_uri, _get_storage_client(module=module))
150148

151-
client = _get_storage_client(module=module)
152-
153-
major, _, _ = map(int, version.split("."))
154-
if major < 3:
155-
return storage.Blob.from_string(gcs_uri, client)
156-
else:
157-
return storage.Blob.from_uri(gcs_uri, client)
149+
return storage.Blob.from_string(gcs_uri, _get_storage_client(module=module))
158150

159151

160152
def split_gcs_uri(gcs_uri: str) -> Tuple[str, str]:

tests/unit/test_gcs_utilities.py

Lines changed: 16 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -598,72 +598,32 @@ def test_get_blobs_with_file_type_error():
598598
gcs_utilities.get_blobs(gcs_bucket_name="test-bucket", gcs_prefix="test.json")
599599

600600

601-
@mock.patch("google.cloud.documentai_toolbox.utilities.gcs_utilities.storage")
602-
def test_get_blob_success_major_3(mock_storage):
603-
mock_version = "3.0.0"
604-
with mock.patch("importlib.metadata.version", return_value=mock_version):
605-
client = mock_storage.Client.return_value
606-
gcs_uri = "gs://test-bucket/test.json"
607-
608-
gcs_utilities.get_blob(gcs_uri)
609-
610-
mock_storage.Blob.from_uri.assert_called_once_with(gcs_uri, client)
601+
def test_get_blob_invalid_uri():
602+
with pytest.raises(ValueError, match="gcs_uri must link to a single file."):
603+
gcs_utilities.get_blob("gs://test-bucket/prefix/")
611604

612605

613606
@mock.patch("google.cloud.documentai_toolbox.utilities.gcs_utilities.storage")
614-
def test_get_blob_success_major_2(mock_storage):
615-
mock_version = "2.0.0"
616-
with mock.patch("importlib.metadata.version", return_value=mock_version):
617-
client = mock_storage.Client.return_value
618-
gcs_uri = "gs://test-bucket/test.json"
619-
620-
gcs_utilities.get_blob(gcs_uri)
607+
def test_get_blob_from_uri(mock_storage):
608+
gcs_uri = "gs://test-bucket/test.json"
621609

622-
mock_storage.Blob.from_string.assert_called_once_with(gcs_uri, client)
610+
# Mock storage.Blob.from_uri to exist
611+
mock_storage.Blob.from_uri.return_value = mock.Mock(spec=storage.blob.Blob)
623612

613+
gcs_utilities.get_blob(gcs_uri=gcs_uri)
624614

625-
def test_get_blob_invalid_uri():
626-
with pytest.raises(ValueError, match="gcs_uri must link to a single file."):
627-
gcs_utilities.get_blob("gs://test-bucket/prefix/")
628-
629-
630-
def test_get_blob_import_error():
631-
with mock.patch(
632-
"importlib.metadata.version",
633-
side_effect=importlib.metadata.PackageNotFoundError,
634-
):
635-
with pytest.raises(ImportError, match="google-cloud-storage is not installed."):
636-
gcs_utilities.get_blob("gs://test-bucket/test.json")
615+
mock_storage.Blob.from_uri.assert_called_once()
637616

638617

639618
@mock.patch("google.cloud.documentai_toolbox.utilities.gcs_utilities.storage")
640-
def test_print_gcs_document_tree_with_skipping_files(mock_storage, capfd):
641-
client = mock_storage.Client.return_value
642-
mock_bucket = mock.Mock()
643-
client.Bucket.return_value = mock_bucket
619+
def test_get_blob_from_string(mock_storage):
620+
gcs_uri = "gs://test-bucket/test.json"
644621

645-
blobs = [
646-
storage.Blob(
647-
name=f"gs://test-directory/1/test_shard{i}.json",
648-
bucket="gs://test-directory/1",
649-
)
650-
for i in range(1, 11)
651-
]
622+
# Mock storage.Blob to NOT have from_uri
623+
del mock_storage.Blob.from_uri
652624

653-
client.list_blobs.return_value = blobs
625+
mock_storage.Blob.from_string.return_value = mock.Mock(spec=storage.blob.Blob)
654626

655-
# files_to_display = 2. 10 files total.
656-
# idx 0, 1, 2 -> print
657-
# idx 3, 4, 5, 6, 7, 8 -> skip
658-
# idx 9 -> print last
659-
gcs_utilities.print_gcs_document_tree(
660-
gcs_bucket_name="test-directory", gcs_prefix="/", files_to_display=2
661-
)
627+
gcs_utilities.get_blob(gcs_uri=gcs_uri)
662628

663-
out, err = capfd.readouterr()
664-
assert "├──test_shard1.json" in out
665-
assert "├──test_shard2.json" in out
666-
assert "├──test_shard3.json" in out
667-
assert "├──test_shard4.json" not in out
668-
assert "│ ...." in out
669-
assert "└──test_shard10.json" in out
629+
mock_storage.Blob.from_string.assert_called_once()

0 commit comments

Comments
 (0)