diff --git a/translate/automl/requirements.txt b/translate/automl/requirements.txt index 08350c58c60c..6143ccba3617 100644 --- a/translate/automl/requirements.txt +++ b/translate/automl/requirements.txt @@ -1 +1,3 @@ +google-cloud-translate==2.0.0 +google-cloud-storage==1.19.1 google-cloud-automl==0.9.0 diff --git a/translate/automl/translate_v3_batch_translate_text_with_model.py b/translate/automl/translate_v3_batch_translate_text_with_model.py new file mode 100644 index 000000000000..010f7f93a90c --- /dev/null +++ b/translate/automl/translate_v3_batch_translate_text_with_model.py @@ -0,0 +1,67 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START translate_v3_batch_translate_text_with_model] +from google.cloud import translate + + +def batch_translate_text_with_model( + input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt", + output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/", + project_id="YOUR_PROJECT_ID", + model_id="YOUR_MODEL_ID", +): + """Batch translate text using Translation model. + Model can be AutoML or General[built-in] model. """ + + client = translate.TranslationServiceClient() + + # Supported file types: https://cloud.google.com/translate/docs/supported-formats + gcs_source = {"input_uri": input_uri} + location = "us-central1" + + input_configs_element = { + "gcs_source": gcs_source, + "mime_type": "text/plain" # Can be "text/plain" or "text/html". + } + gcs_destination = {"output_uri_prefix": output_uri} + output_config = {"gcs_destination": gcs_destination} + parent = client.location_path(project_id, location) + + model_path = "projects/{}/locations/{}/models/{}".format( + project_id, location, model_id # The location of AutoML model. + ) + + # Supported language codes: https://cloud.google.com/translate/docs/languages + models = {"ja": model_path} # takes a target lang as key. + + operation = client.batch_translate_text( + parent=parent, + source_language_code="en", + target_language_codes=["ja"], # Up to 10 language codes here. + input_configs=[input_configs_element], + output_config=output_config, + models=models, + ) + + print(u"Waiting for operation to complete...") + response = operation.result() + + # Display the translation for each input text provided. + print(u"Total Characters: {}".format(response.total_characters)) + print(u"Translated Characters: {}".format(response.translated_characters)) + + +# [END translate_v3_batch_translate_text_with_model] diff --git a/translate/automl/translate_v3_batch_translate_text_with_model_test.py b/translate/automl/translate_v3_batch_translate_text_with_model_test.py new file mode 100644 index 000000000000..74b044f43527 --- /dev/null +++ b/translate/automl/translate_v3_batch_translate_text_with_model_test.py @@ -0,0 +1,46 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pytest +import uuid +import translate_v3_batch_translate_text_with_model +from google.cloud import storage + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +MODEL_ID = "TRL3128559826197068699" + + +@pytest.fixture(scope="function") +def bucket(): + """Create a temporary bucket to store annotation output.""" + bucket_name = str(uuid.uuid1()) + storage_client = storage.Client() + bucket = storage_client.create_bucket(bucket_name) + + yield bucket + + bucket.delete(force=True) + + +def test_batch_translate_text_with_model(capsys, bucket): + translate_v3_batch_translate_text_with_model.batch_translate_text_with_model( + "gs://cloud-samples-data/translation/custom_model_text.txt", + "gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name), + PROJECT_ID, + MODEL_ID, + ) + out, _ = capsys.readouterr() + assert "Total Characters: 15" in out + assert "Translated Characters: 15" in out diff --git a/translate/cloud-client/translate_v3_batch_translate_text.py b/translate/cloud-client/translate_v3_batch_translate_text.py new file mode 100644 index 000000000000..b4650e054b24 --- /dev/null +++ b/translate/cloud-client/translate_v3_batch_translate_text.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START translate_v3_batch_translate_text] +from google.cloud import translate + + +def batch_translate_text( + input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt", + output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/", + project_id="YOUR_PROJECT_ID" +): + """Translates a batch of texts on GCS and stores the result in a GCS location.""" + + client = translate.TranslationServiceClient() + + location = "us-central1" + # Supported file types: https://cloud.google.com/translate/docs/supported-formats + gcs_source = {"input_uri": input_uri} + + input_configs_element = { + "gcs_source": gcs_source, + "mime_type": "text/plain" # Can be "text/plain" or "text/html". + } + gcs_destination = {"output_uri_prefix": output_uri} + output_config = {"gcs_destination": gcs_destination} + parent = client.location_path(project_id, location) + + # Supported language codes: https://cloud.google.com/translate/docs/language + operation = client.batch_translate_text( + parent=parent, + source_language_code="en", + target_language_codes=["ja"], # Up to 10 language codes here. + input_configs=[input_configs_element], + output_config=output_config) + + print(u"Waiting for operation to complete...") + response = operation.result(90) + + print(u"Total Characters: {}".format(response.total_characters)) + print(u"Translated Characters: {}".format(response.translated_characters)) + + +# [END translate_v3_batch_translate_text] diff --git a/translate/cloud-client/translate_v3_batch_translate_text_test.py b/translate/cloud-client/translate_v3_batch_translate_text_test.py new file mode 100644 index 000000000000..c6e1e6e352e7 --- /dev/null +++ b/translate/cloud-client/translate_v3_batch_translate_text_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pytest +import translate_v3_batch_translate_text +import uuid +from google.cloud import storage + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] + + +@pytest.fixture(scope="function") +def bucket(): + """Create a temporary bucket to store annotation output.""" + bucket_name = str(uuid.uuid1()) + storage_client = storage.Client() + bucket = storage_client.create_bucket(bucket_name) + + yield bucket + + bucket.delete(force=True) + + +def test_batch_translate_text(capsys, bucket): + translate_v3_batch_translate_text.batch_translate_text( + "gs://cloud-samples-data/translation/text.txt", + "gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name), + PROJECT_ID, + ) + out, _ = capsys.readouterr() + assert "Total Characters" in out diff --git a/translate/cloud-client/translate_v3_batch_translate_text_with_glossary.py b/translate/cloud-client/translate_v3_batch_translate_text_with_glossary.py new file mode 100644 index 000000000000..fa08d641b5ec --- /dev/null +++ b/translate/cloud-client/translate_v3_batch_translate_text_with_glossary.py @@ -0,0 +1,74 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START translate_v3_batch_translate_text_with_glossary] +from google.cloud import translate + + +def batch_translate_text_with_glossary( + input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt", + output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/", + project_id="YOUR_PROJECT_ID", + glossary_id="YOUR_GLOSSARY_ID", +): + """Translates a batch of texts on GCS and stores the result in a GCS location. + Glossary is applied for translation.""" + + client = translate.TranslationServiceClient() + + # Supported language codes: https://cloud.google.com/translate/docs/languages + location = "us-central1" + + # Supported file types: https://cloud.google.com/translate/docs/supported-formats + gcs_source = {"input_uri": input_uri} + + input_configs_element = { + "gcs_source": gcs_source, + "mime_type": "text/plain" # Can be "text/plain" or "text/html". + } + gcs_destination = {"output_uri_prefix": output_uri} + output_config = {"gcs_destination": gcs_destination} + + parent = client.location_path(project_id, location) + + # glossary is a custom dictionary Translation API uses + # to translate the domain-specific terminology. + glossary_path = client.glossary_path( + project_id, "us-central1", glossary_id # The location of the glossary + ) + + glossary_config = translate.types.TranslateTextGlossaryConfig( + glossary=glossary_path + ) + + glossaries = {"ja": glossary_config} # target lang as key + + operation = client.batch_translate_text( + parent=parent, + source_language_code="en", + target_language_codes=["ja"], # Up to 10 language codes here. + input_configs=[input_configs_element], + glossaries=glossaries, + output_config=output_config, + ) + + print(u"Waiting for operation to complete...") + response = operation.result(120) + + print(u"Total Characters: {}".format(response.total_characters)) + print(u"Translated Characters: {}".format(response.translated_characters)) + + +# [END translate_v3_batch_translate_text_with_glossary] diff --git a/translate/cloud-client/translate_v3_batch_translate_text_with_glossary_test.py b/translate/cloud-client/translate_v3_batch_translate_text_with_glossary_test.py new file mode 100644 index 000000000000..87d97b73c044 --- /dev/null +++ b/translate/cloud-client/translate_v3_batch_translate_text_with_glossary_test.py @@ -0,0 +1,64 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pytest +import uuid +import translate_v3_batch_translate_text_with_glossary +import translate_v3_create_glossary +import translate_v3_delete_glossary +from google.cloud import storage + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +GLOSSARY_INPUT_URI = "gs://cloud-samples-data/translation/glossary_ja.csv" + + +@pytest.fixture(scope="session") +def glossary(): + """Get the ID of a glossary available to session (do not mutate/delete).""" + glossary_id = "must-start-with-letters-" + str(uuid.uuid1()) + translate_v3_create_glossary.create_glossary( + PROJECT_ID, GLOSSARY_INPUT_URI, glossary_id + ) + + yield glossary_id + + try: + translate_v3_delete_glossary.delete_glossary(PROJECT_ID, glossary_id) + except Exception: + pass + + +@pytest.fixture(scope="function") +def bucket(): + """Create a temporary bucket to store annotation output.""" + bucket_name = str(uuid.uuid1()) + storage_client = storage.Client() + bucket = storage_client.create_bucket(bucket_name) + + yield bucket + + bucket.delete(force=True) + + +def test_batch_translate_text_with_glossary(capsys, bucket, glossary): + translate_v3_batch_translate_text_with_glossary.batch_translate_text_with_glossary( + "gs://cloud-samples-data/translation/text_with_glossary.txt", + "gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name), + PROJECT_ID, + glossary, + ) + + out, _ = capsys.readouterr() + assert "Total Characters: 9" in out