Skip to content

Commit

Permalink
vision: move published samples into master (#2743)
Browse files Browse the repository at this point in the history
Add generated samples for Vision API
Add required attribute mime_type
Resolve encoding error in py2
Remove autogenerated warnings
Remove coding: utf-8 line
Remove argument encoding checks
Remove CLI
Remove unnecessary statics, variables, and imports
Blacken with l=88
Remove unused region tag and comments
Verify that there are no published links pointing to removed region tags
Shorten docstring
Replace concrete file path with "path/to/your/document.pdf"

Co-authored-by: Yu-Han Liu <dizcology@hotmail.com>
  • Loading branch information
texasmichelle and dizcology authored Jan 28, 2020
1 parent 899cee8 commit 1425a54
Show file tree
Hide file tree
Showing 6 changed files with 286 additions and 0 deletions.
51 changes: 51 additions & 0 deletions vision/cloud-client/detect/vision_async_batch_annotate_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START vision_async_batch_annotate_images]

from google.cloud import vision_v1
from google.cloud.vision_v1 import enums


def sample_async_batch_annotate_images(
input_image_uri="gs://cloud-samples-data/vision/label/wakeupcat.jpg",
output_uri="gs://your-bucket/prefix/",
):
"""Perform async batch image annotation."""
client = vision_v1.ImageAnnotatorClient()

source = {"image_uri": input_image_uri}
image = {"source": source}
features = [
{"type": enums.Feature.Type.LABEL_DETECTION},
{"type": enums.Feature.Type.IMAGE_PROPERTIES},
]
requests = [{"image": image, "features": features}]
gcs_destination = {"uri": output_uri}

# The max number of responses to output in each JSON file
batch_size = 2
output_config = {"gcs_destination": gcs_destination, "batch_size": batch_size}

operation = client.async_batch_annotate_images(requests, output_config)

print("Waiting for operation to complete...")
response = operation.result()

# The output is written to GCS with the provided output_uri as prefix
gcs_output_uri = response.output_config.gcs_destination.uri
print("Output written to GCS with prefix: {}".format(gcs_output_uri))


# [END vision_async_batch_annotate_images]
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright 2020 Google
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import uuid

from google.cloud import storage
import pytest

import vision_async_batch_annotate_images

RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
GCS_ROOT = "gs://cloud-samples-data/vision/"

BUCKET = os.environ["CLOUD_STORAGE_BUCKET"]
OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4())
GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX)


@pytest.fixture()
def storage_client():
yield storage.Client()


@pytest.fixture()
def bucket(storage_client):
bucket = storage_client.get_bucket(BUCKET)

try:
for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
blob.delete()
except Exception:
pass

yield bucket

for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
blob.delete()


def test_sample_asyn_batch_annotate_images(storage_client, bucket, capsys):
input_image_uri = os.path.join(GCS_ROOT, "label/wakeupcat.jpg")

vision_async_batch_annotate_images.sample_async_batch_annotate_images(
input_image_uri=input_image_uri, output_uri=GCS_DESTINATION_URI
)

out, _ = capsys.readouterr()

assert "Output written to GCS" in out
assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0
56 changes: 56 additions & 0 deletions vision/cloud-client/detect/vision_batch_annotate_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START vision_batch_annotate_files]

from google.cloud import vision_v1
from google.cloud.vision_v1 import enums
import io


def sample_batch_annotate_files(file_path="path/to/your/document.pdf"):
"""Perform batch file annotation."""
client = vision_v1.ImageAnnotatorClient()

# Supported mime_type: application/pdf, image/tiff, image/gif
mime_type = "application/pdf"
with io.open(file_path, "rb") as f:
content = f.read()
input_config = {"mime_type": mime_type, "content": content}
features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}]

# The service can process up to 5 pages per document file. Here we specify
# the first, second, and last page of the document to be processed.
pages = [1, 2, -1]
requests = [{"input_config": input_config, "features": features, "pages": pages}]

response = client.batch_annotate_files(requests)
for image_response in response.responses[0].responses:
print(u"Full text: {}".format(image_response.full_text_annotation.text))
for page in image_response.full_text_annotation.pages:
for block in page.blocks:
print(u"\nBlock confidence: {}".format(block.confidence))
for par in block.paragraphs:
print(u"\tParagraph confidence: {}".format(par.confidence))
for word in par.words:
print(u"\t\tWord confidence: {}".format(word.confidence))
for symbol in word.symbols:
print(
u"\t\t\tSymbol: {}, (confidence: {})".format(
symbol.text, symbol.confidence
)
)


# [END vision_batch_annotate_files]
57 changes: 57 additions & 0 deletions vision/cloud-client/detect/vision_batch_annotate_files_gcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START vision_batch_annotate_files_gcs]

from google.cloud import vision_v1
from google.cloud.vision_v1 import enums


def sample_batch_annotate_files(
storage_uri="gs://cloud-samples-data/vision/document_understanding/kafka.pdf",
):
"""Perform batch file annotation."""
mime_type = "application/pdf"

client = vision_v1.ImageAnnotatorClient()

gcs_source = {"uri": storage_uri}
input_config = {"gcs_source": gcs_source, "mime_type": mime_type}
features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}]

# The service can process up to 5 pages per document file.
# Here we specify the first, second, and last page of the document to be
# processed.
pages = [1, 2, -1]
requests = [{"input_config": input_config, "features": features, "pages": pages}]

response = client.batch_annotate_files(requests)
for image_response in response.responses[0].responses:
print(u"Full text: {}".format(image_response.full_text_annotation.text))
for page in image_response.full_text_annotation.pages:
for block in page.blocks:
print(u"\nBlock confidence: {}".format(block.confidence))
for par in block.paragraphs:
print(u"\tParagraph confidence: {}".format(par.confidence))
for word in par.words:
print(u"\t\tWord confidence: {}".format(word.confidence))
for symbol in word.symbols:
print(
u"\t\t\tSymbol: {}, (confidence: {})".format(
symbol.text, symbol.confidence
)
)


# [END vision_batch_annotate_files_gcs]
30 changes: 30 additions & 0 deletions vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2020 Google
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import vision_batch_annotate_files_gcs

GCS_ROOT = "gs://cloud-samples-data/vision/"


def test_sample_batch_annotate_files_gcs(capsys):
storage_uri = os.path.join(GCS_ROOT, "document_understanding/kafka.pdf")

vision_batch_annotate_files_gcs.sample_batch_annotate_files(storage_uri=storage_uri)

out, _ = capsys.readouterr()

assert "Full text" in out
assert "Block confidence" in out
30 changes: 30 additions & 0 deletions vision/cloud-client/detect/vision_batch_annotate_files_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2020 Google
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import vision_batch_annotate_files

RESOURCES = os.path.join(os.path.dirname(__file__), "resources")


def test_sample_batch_annotate_files(capsys):
file_path = os.path.join(RESOURCES, "kafka.pdf")

vision_batch_annotate_files.sample_batch_annotate_files(file_path=file_path)

out, _ = capsys.readouterr()

assert "Full text" in out
assert "Block confidence" in out

0 comments on commit 1425a54

Please sign in to comment.