-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* intial Natural Language commit * fixed input path for skipped model test * removed DS store, attempting linter again * attempted linter fix * removed lodash * package json test directive fix, LLC fix * dependencies for mathjs, automl in sample file added * path fixes, project id fixes, test fixes * comment/test cleanup * fixed tutorial file * manual readme update * readme path fix model * mefailenglishthat'sunmpossible spelling fix * style fix for console statements * Style fixes; thanks Ace! * path fix! * Fix ENV variable for project Id (GCLOUD_PROJECT) * Language AutoML samples * fixing lint issues * Converting test to mocha * Checking if Kokoro failure was a blip
- Loading branch information
1 parent
8a4c29b
commit dd9a9be
Showing
10 changed files
with
1,307 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
--- | ||
rules: | ||
no-console: off |
393 changes: 393 additions & 0 deletions
393
cloud-language/snippets/automl/automlNaturalLanguageDataset.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,393 @@ | ||
/** | ||
* Copyright 2018, Google, LLC. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/** | ||
* This application demonstrates how to perform basic operations on dataset | ||
* with the Google AutoML Natural Language API. | ||
* | ||
* For more information, see the documentation at | ||
* https://cloud.google.com/natural-language/automl/docs/ | ||
*/ | ||
|
||
`use strict`; | ||
|
||
function createDataset(projectId, computeRegion, datasetName, multilabel) { | ||
// [START automl_natural_language_createDataset] | ||
const automl = require(`@google-cloud/automl`); | ||
|
||
const client = new automl.v1beta1.AutoMlClient(); | ||
|
||
/** | ||
* TODO(developer): Uncomment the following line before running the sample. | ||
*/ | ||
// const projectId = `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`; | ||
// const computeRegion = `region-name, e.g. "us-central1"`; | ||
// const datasetName = `name of the dataset to create, e.g. “myDataset”`; | ||
// const multiLabel = `type of the classification problem, e.g “False”, “True” (multilabel)`; | ||
|
||
// A resource that represents Google Cloud Platform location. | ||
const projectLocation = client.locationPath(projectId, computeRegion); | ||
|
||
// Classification type is assigned based on multilabel value. | ||
let classificationType = `MULTICLASS`; | ||
if (multilabel) { | ||
classificationType = `MULTILABEL`; | ||
} | ||
|
||
// Set dataset name and metadata. | ||
const myDataset = { | ||
displayName: datasetName, | ||
textClassificationDatasetMetadata: { | ||
classificationType: classificationType, | ||
}, | ||
}; | ||
|
||
// Create a dataset with the dataset metadata in the region. | ||
client | ||
.createDataset({parent: projectLocation, dataset: myDataset}) | ||
.then(responses => { | ||
const dataset = responses[0]; | ||
|
||
// Display the dataset information. | ||
console.log(`Dataset name: ${dataset.name}`); | ||
console.log(`Dataset id: ${dataset.name.split(`/`).pop(-1)}`); | ||
console.log(`Dataset display name: ${dataset.displayName}`); | ||
console.log(`Dataset example count: ${dataset.exampleCount}`); | ||
console.log(`Text classification type:`); | ||
console.log( | ||
`\t ${dataset.textClassificationDatasetMetadata.classificationType}` | ||
); | ||
console.log(`Dataset create time:`); | ||
console.log(`\tseconds: ${dataset.createTime.seconds}`); | ||
console.log(`\tnanos: ${dataset.createTime.nanos}`); | ||
}) | ||
.catch(err => { | ||
console.error(err); | ||
}); | ||
// [END automl_natural_language_createDataset] | ||
} | ||
|
||
function listDatasets(projectId, computeRegion, filter) { | ||
// [START automl_natural_language_listDatasets] | ||
const automl = require(`@google-cloud/automl`); | ||
|
||
const client = new automl.v1beta1.AutoMlClient(); | ||
|
||
/** | ||
* TODO(developer): Uncomment the following line before running the sample. | ||
*/ | ||
// const projectId = `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`; | ||
// const computeRegion = `region-name, e.g. "us-central1"`; | ||
// const filter_ = `filter expressions, must specify field e.g. “imageClassificationModelMetadata:*”`; | ||
|
||
// A resource that represents a Google Cloud Platform location. | ||
const projectLocation = client.locationPath(projectId, computeRegion); | ||
|
||
// List all the datasets available in the region by applying filter. | ||
client | ||
.listDatasets({parent: projectLocation, filter: filter}) | ||
.then(responses => { | ||
const datasets = responses[0]; | ||
|
||
// Display the dataset information. | ||
console.log(`List of datasets:`); | ||
datasets.forEach(dataset => { | ||
console.log(`Dataset name: ${dataset.name}`); | ||
console.log(`Dataset id: ${dataset.name.split(`/`).pop(-1)}`); | ||
console.log(`Dataset display name: ${dataset.displayName}`); | ||
console.log(`Dataset example count: ${dataset.exampleCount}`); | ||
console.log(`Text classification type:`); | ||
console.log( | ||
`\t ${dataset.textClassificationDatasetMetadata.classificationType}` | ||
); | ||
console.log(`Dataset create time: `); | ||
console.log(`\tseconds: ${dataset.createTime.seconds}`); | ||
console.log(`\tnanos: ${dataset.createTime.nanos}`); | ||
console.log(`\n`); | ||
}); | ||
}) | ||
.catch(err => { | ||
console.error(err); | ||
}); | ||
// [END automl_natural_language_listDatasets] | ||
} | ||
|
||
function getDataset(projectId, computeRegion, datasetId) { | ||
// [START automl_natural_language_getDataset] | ||
const automl = require(`@google-cloud/automl`); | ||
|
||
const client = new automl.v1beta1.AutoMlClient(); | ||
|
||
/** | ||
* TODO(developer): Uncomment the following line before running the sample. | ||
*/ | ||
// const projectId = `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`; | ||
// const computeRegion = `region-name, e.g. "us-central1"`; | ||
// const datasetId = `Id of the dataset`; | ||
|
||
// Get the full path of the dataset. | ||
const datasetFullId = client.datasetPath(projectId, computeRegion, datasetId); | ||
|
||
// Get complete detail of the dataset. | ||
client | ||
.getDataset({name: datasetFullId}) | ||
.then(responses => { | ||
const dataset = responses[0]; | ||
|
||
// Display the dataset information. | ||
console.log(`Dataset name: ${dataset.name}`); | ||
console.log(`Dataset id: ${dataset.name.split(`/`).pop(-1)}`); | ||
console.log(`Dataset display name: ${dataset.displayName}`); | ||
console.log(`Dataset example count: ${dataset.exampleCount}`); | ||
console.log( | ||
`Text classification type: ${ | ||
dataset.textClassificationDatasetMetadata.classificationType | ||
}` | ||
); | ||
console.log(`Dataset create time: `); | ||
console.log(`\tseconds: ${dataset.createTime.seconds}`); | ||
console.log(`\tnanos: ${dataset.createTime.nanos}`); | ||
}) | ||
.catch(err => { | ||
console.error(err); | ||
}); | ||
// [END automl_natural_language_getDataset] | ||
} | ||
|
||
function importData(projectId, computeRegion, datasetId, path) { | ||
// [START automl_natural_language_importDataset] | ||
const automl = require(`@google-cloud/automl`); | ||
|
||
const client = new automl.v1beta1.AutoMlClient(); | ||
|
||
/** | ||
* TODO(developer): Uncomment the following line before running the sample. | ||
*/ | ||
// const projectId = `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`; | ||
// const computeRegion = `region-name, e.g. "us-central1"`; | ||
// const datasetId = `Id of the dataset`; | ||
// const path = `string or array of .csv paths in AutoML Vision CSV format, e.g. “gs://myproject/mytraindata.csv”;` | ||
|
||
// Get the full path of the dataset. | ||
const datasetFullId = client.datasetPath(projectId, computeRegion, datasetId); | ||
|
||
// Get the multiple Google Cloud Storage URIs. | ||
const inputUris = path.split(`,`); | ||
const inputConfig = { | ||
gcsSource: { | ||
inputUris: inputUris, | ||
}, | ||
}; | ||
|
||
// Import the dataset from the input URI. | ||
client | ||
.importData({name: datasetFullId, inputConfig: inputConfig}) | ||
.then(responses => { | ||
const operation = responses[0]; | ||
console.log(`Processing import...`); | ||
return operation.promise(); | ||
}) | ||
.then(responses => { | ||
// The final result of the operation. | ||
if (responses[2].done === true) console.log(`Data imported.`); | ||
}) | ||
.catch(err => { | ||
console.error(err); | ||
}); | ||
// [END automl_natural_language_importDataset] | ||
} | ||
|
||
function exportData(projectId, computeRegion, datasetId, outputUri) { | ||
// [START automl_natural_language_exportDataset] | ||
const automl = require(`@google-cloud/automl`); | ||
|
||
const client = new automl.v1beta1.AutoMlClient(); | ||
|
||
/** | ||
* TODO(developer): Uncomment the following line before running the sample. | ||
*/ | ||
// const projectId = `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`; | ||
// const computeRegion = `region-name, e.g. "us-central1"`; | ||
// const datasetId = `Id of the dataset`; | ||
// const outputUri = `Google Cloud Storage URI for the export directory, e.g. “gs://myproject/output”;` | ||
|
||
// Get the full path of the dataset. | ||
const datasetFullId = client.datasetPath(projectId, computeRegion, datasetId); | ||
|
||
// Set the output URI | ||
const outputConfig = { | ||
gcsDestination: { | ||
outputUriPrefix: outputUri, | ||
}, | ||
}; | ||
|
||
// Export the data to the output URI. | ||
client | ||
.exportData({name: datasetFullId, outputConfig: outputConfig}) | ||
.then(responses => { | ||
const operation = responses[0]; | ||
console.log(`Processing export...`); | ||
return operation.promise(); | ||
}) | ||
.then(responses => { | ||
// The final result of the operation. | ||
if (responses[2].done === true) console.log(`Data exported.`); | ||
}) | ||
.catch(err => { | ||
console.error(err); | ||
}); | ||
// [END automl_natural_language_exportDataset] | ||
} | ||
|
||
function deleteDataset(projectId, computeRegion, datasetId) { | ||
// [START automl_natural_language_deleteDataset] | ||
const automl = require(`@google-cloud/automl`); | ||
|
||
const client = new automl.v1beta1.AutoMlClient(); | ||
|
||
/** | ||
* TODO(developer): Uncomment the following line before running the sample. | ||
*/ | ||
// const projectId = `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`; | ||
// const computeRegion = `region-name, e.g. "us-central1"`; | ||
// const datasetId = `Id of the dataset`; | ||
|
||
// Get the full path of the dataset. | ||
const datasetFullId = client.datasetPath(projectId, computeRegion, datasetId); | ||
|
||
// Delete a dataset. | ||
client | ||
.deleteDataset({name: datasetFullId}) | ||
.then(responses => { | ||
const operation = responses[0]; | ||
return operation.promise(); | ||
}) | ||
.then(responses => { | ||
// The final result of the operation. | ||
if (responses[2].done === true) console.log(`Dataset deleted.`); | ||
}) | ||
.catch(err => { | ||
console.error(err); | ||
}); | ||
// [END automl_natural_language_deleteDataset] | ||
} | ||
|
||
require(`yargs`) | ||
.demand(1) | ||
.options({ | ||
computeRegion: { | ||
alias: `c`, | ||
type: `string`, | ||
default: process.env.REGION_NAME, | ||
requiresArg: true, | ||
description: `region name e.g. "us-central1"`, | ||
}, | ||
datasetName: { | ||
alias: `n`, | ||
type: `string`, | ||
default: `testDataSet`, | ||
requiresArg: true, | ||
description: `Name of the Dataset`, | ||
}, | ||
datasetId: { | ||
alias: `i`, | ||
type: `string`, | ||
requiresArg: true, | ||
description: `Id of the dataset`, | ||
}, | ||
filter: { | ||
alias: `f`, | ||
default: `text_classification_dataset_metadata:*`, | ||
type: `string`, | ||
requiresArg: false, | ||
description: `filter expression`, | ||
}, | ||
multilabel: { | ||
alias: `m`, | ||
type: `string`, | ||
default: false, | ||
requiresArg: true, | ||
description: | ||
`Type of the classification problem, ` + | ||
`False - MULTICLASS, True - MULTILABEL.`, | ||
}, | ||
outputUri: { | ||
alias: `o`, | ||
type: `string`, | ||
requiresArg: true, | ||
description: `URI (or local path) to export dataset`, | ||
}, | ||
path: { | ||
alias: `p`, | ||
type: `string`, | ||
global: true, | ||
default: `gs://nodejs-docs-samples-vcm/flowerTraindataMini.csv`, | ||
requiresArg: true, | ||
description: `URI or local path to input .csv, or array of .csv paths`, | ||
}, | ||
projectId: { | ||
alias: `z`, | ||
type: `number`, | ||
default: process.env.GCLOUD_PROJECT, | ||
requiresArg: true, | ||
description: `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`, | ||
}, | ||
}) | ||
.command(`create-dataset`, `creates a new Dataset`, {}, opts => | ||
createDataset( | ||
opts.projectId, | ||
opts.computeRegion, | ||
opts.datasetName, | ||
opts.multilabel | ||
) | ||
) | ||
.command(`list-datasets`, `list all Datasets`, {}, opts => | ||
listDatasets(opts.projectId, opts.computeRegion, opts.filter) | ||
) | ||
.command(`get-dataset`, `Get a Dataset`, {}, opts => | ||
getDataset(opts.projectId, opts.computeRegion, opts.datasetId) | ||
) | ||
.command(`delete-dataset`, `Delete a dataset`, {}, opts => | ||
deleteDataset(opts.projectId, opts.computeRegion, opts.datasetId) | ||
) | ||
.command(`import-data`, `Import labeled items into dataset`, {}, opts => | ||
importData(opts.projectId, opts.computeRegion, opts.datasetId, opts.path) | ||
) | ||
.command( | ||
`export-data`, | ||
`Export a dataset to a Google Cloud Storage Bucket`, | ||
{}, | ||
opts => | ||
exportData( | ||
opts.projectId, | ||
opts.computeRegion, | ||
opts.datasetId, | ||
opts.outputUri | ||
) | ||
) | ||
.example(`node $0 create-dataset -n "newDataSet"`) | ||
.example(`node $0 list-datasets -f "imageClassificationDatasetMetadata:*"`) | ||
.example(`node $0 get-dataset -i "DATASETID"`) | ||
.example(`node $0 delete-dataset -i "DATASETID"`) | ||
.example( | ||
`node $0 import-data -i "dataSetId" -p "gs://myproject/mytraindata.csv"` | ||
) | ||
.example( | ||
`node $0 export-data -i "dataSetId" -o "gs://myproject/outputdestination.csv"` | ||
) | ||
.wrap(120) | ||
.recommendCommands() | ||
.help() | ||
.strict().argv; |
Oops, something went wrong.