Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data Labeling Beta samples #2096

Merged
merged 13 commits into from
Apr 5, 2019
Prev Previous commit
Next Next commit
update import data and test
  • Loading branch information
dizcology committed Apr 4, 2019
commit f8720a40999bb1e7184cb649eaaf36482bd09f56
38 changes: 22 additions & 16 deletions datalabeling/export_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,45 +19,51 @@

# [START datalabeling_export_data_beta]
def export_data(dataset_resource_name, annotated_dataset_resource_name,
export_gcs_uri):
export_gcs_uri):
"""Exports a dataset from the given Google Cloud project."""
from google.cloud import datalabeling_v1beta1 as datalabeling
client = datalabeling.DataLabelingServiceClient()

gcs_destination = datalabeling.types.GcsDestination(
output_uri=export_gcs_uri, mime_type='text/csv')
output_uri=export_gcs_uri, mime_type='text/csv')

output_config = datalabeling.types.OutputConfig(
gcs_destination=gcs_destination)
gcs_destination=gcs_destination)

response = client.export_data(dataset_resource_name,
annotated_dataset_resource_name, output_config)
response = client.export_data(dataset_resource_name, annotated_dataset_resource_name, output_config)

print('Dataset ID: {}\n'.format(response.result().dataset))
print('Output config:')
print('\tGcs destination:')
print('\t\tOutput URI: {}\n'.format(
response.result().output_config.gcs_destination.output_uri))
response.result().output_config.gcs_destination.output_uri))
# [END datalabeling_export_data_beta]

if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
)

parser.add_argument(
'--dataset-resource-name',
help='Dataset resource name. Required.',
required=True)
'--dataset-resource-name',
help='Dataset resource name. Required.',
required=True
)

parser.add_argument(
'--annotated-dataset-resource-name',
help='Annotated Dataset resource name. Required.',
required=True)
'--annotated-dataset-resource-name',
help='Annotated Dataset resource name. Required.',
required=True
)

parser.add_argument(
'--export-gcs-uri', help='The export GCS URI. Required.', required=True)
'--export-gcs-uri',
help='The export GCS URI. Required.',
required=True
)

args = parser.parse_args()

export_data(args.dataset_resource_name, args.annotated_dataset_resource_name,
args.export_gcs_uri)
export_data(args.dataset_resource_name,
args.annotated_dataset_resource_name, args.export_gcs_uri)
35 changes: 22 additions & 13 deletions datalabeling/import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,35 +24,44 @@ def import_data(dataset_resource_name, data_type, input_gcs_uri):
client = datalabeling.DataLabelingServiceClient()

gcs_source = datalabeling.types.GcsSource(
input_uri=input_gcs_uri, mime_type='text/csv')
input_uri=input_gcs_uri, mime_type='text/csv')

image_csv_input_config = datalabeling.types.InputConfig(
data_type=data_type, gcs_source=gcs_source)
data_type=data_type, gcs_source=gcs_source)

response = client.import_data(dataset_resource_name, image_csv_input_config)

result = response.result()

# The format of resource name: project_id/{project_id}/datasets/{dataset_id}
print('Dataset resource name: {}\n'.format(response.result().dataset))
print('Dataset resource name: {}\n'.format(result.dataset))

return result
# [END datalabeling_import_data_beta]

if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
)

parser.add_argument(
'--dataset-resource-name',
help='Dataset resource name. Required.',
required=True)
'--dataset-resource-name',
help='Dataset resource name. Required.',
required=True
)

parser.add_argument(
'--data-type',
help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.',
required=True)
'--data-type',
help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.',
required=True
)

parser.add_argument(
'--input-gcs-uri',
help='The GCS URI of the input dataset. Required.',
required=True)
'--input-gcs-uri',
help='The GCS URI of the input dataset. Required.',
required=True
)

args = parser.parse_args()

Expand Down
28 changes: 14 additions & 14 deletions datalabeling/import_data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,22 @@
import manage_dataset

PROJECT_ID = os.getenv('GCLOUD_PROJECT')
INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/image/image_dataset.csv'


@pytest.mark.slow
def test_import_data(capsys):
# Generates a dataset_resource_name.
manage_dataset.create_dataset(PROJECT_ID)
out, _ = capsys.readouterr()
create_dataset_output = out.splitlines()
dataset_resource_name = create_dataset_output[0].split()[4]
@pytest.fixture(scope='function')
def dataset():
# create a temporary dataset
dataset = manage_dataset.create_dataset(PROJECT_ID)

yield dataset

# tear down
manage_dataset.delete_dataset(dataset.name)

# Starts to test the import_data.
import_data.import_data(
dataset_resource_name, 'IMAGE',
'gs://cloud-samples-data/datalabeling/image/image_dataset.csv')

@pytest.mark.slow
def test_import_data(capsys, dataset):
import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI)
out, _ = capsys.readouterr()
assert 'Dataset resource name: ' in out

# Deletes the created dataset.
manage_dataset.delete_dataset(dataset_resource_name)
2 changes: 2 additions & 0 deletions datalabeling/manage_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ def create_dataset(project_id):
print('Create time:')
print('\tseconds: {}'.format(response.create_time.seconds))
print('\tnanos: {}'.format(response.create_time.nanos))

return response
# [END datalabeling_create_dataset_beta]


Expand Down