Skip to content

Commit 52b0303

Browse files
authored
Merge branch 'main' into nodejs-monitoring-migration
2 parents bd4b76f + e7c19e7 commit 52b0303

30 files changed

+1546
-0
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ jobs:
2121
- uses: JustinBeckwith/linkinator-action@v1
2222
with:
2323
paths: "**/*.md"
24+
linksToSkip: "localhost"
2425
region-tags:
2526
runs-on: ubuntu-latest
2627
steps:

.github/workflows/document-ai.yaml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
name: document-ai
2+
on:
3+
push:
4+
branches:
5+
- main
6+
paths:
7+
- 'document-ai/**'
8+
pull_request:
9+
paths:
10+
- 'document-ai/**'
11+
pull_request_target:
12+
types: [labeled]
13+
paths:
14+
- 'document-ai/**'
15+
schedule:
16+
- cron: '0 0 * * 0'
17+
jobs:
18+
test:
19+
if: ${{ github.event.action != 'labeled' || github.event.label.name == 'actions:force-run' }}
20+
runs-on: ubuntu-latest
21+
timeout-minutes: 60
22+
permissions:
23+
contents: 'write'
24+
pull-requests: 'write'
25+
id-token: 'write'
26+
steps:
27+
- uses: actions/checkout@v3.1.0
28+
with:
29+
ref: ${{github.event.pull_request.head.sha}}
30+
- uses: 'google-github-actions/auth@v1.0.0'
31+
with:
32+
workload_identity_provider: 'projects/1046198160504/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider'
33+
service_account: 'kokoro-system-test@long-door-651.iam.gserviceaccount.com'
34+
create_credentials_file: 'true'
35+
access_token_lifetime: 600s
36+
- uses: actions/setup-node@v3.5.1
37+
with:
38+
node-version: 16
39+
- run: npm install
40+
working-directory: document-ai
41+
- run: npm test
42+
working-directory: document-ai
43+
env:
44+
MOCHA_REPORTER_SUITENAME: document-ai
45+
MOCHA_REPORTER_OUTPUT: document_ai_sponge_log.xml
46+
MOCHA_REPORTER: xunit
47+
- if: ${{ github.event.action == 'labeled' && github.event.label.name == 'actions:force-run' }}
48+
uses: actions/github-script@v6
49+
with:
50+
github-token: ${{ secrets.GITHUB_TOKEN }}
51+
script: |
52+
try {
53+
await github.rest.issues.removeLabel({
54+
name: 'actions:force-run',
55+
owner: 'GoogleCloudPlatform',
56+
repo: 'nodejs-docs-samples',
57+
issue_number: context.payload.pull_request.number
58+
});
59+
} catch (e) {
60+
if (!e.message.includes('Label does not exist')) {
61+
throw e;
62+
}
63+
}
64+
- if: ${{ github.event_name == 'schedule'}}
65+
run: |
66+
curl https://github.com/googleapis/repo-automation-bots/releases/download/flakybot-1.1.0/flakybot -o flakybot -s -L
67+
chmod +x ./flakybot
68+
./flakybot --repo GoogleCloudPlatform/nodejs-docs-samples --commit_hash ${{github.sha}} --build_url https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}

.github/workflows/scheduler.yaml

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
name: scheduler
2+
on:
3+
push:
4+
branches:
5+
- main
6+
paths:
7+
- 'scheduler/**'
8+
pull_request:
9+
paths:
10+
- 'scheduler/**'
11+
pull_request_target:
12+
types: [labeled]
13+
schedule:
14+
- cron: '0 0 * * 0'
15+
jobs:
16+
test:
17+
if: ${{ github.event.action != 'labeled' || github.event.label.name == 'actions:force-run' }}
18+
runs-on: ubuntu-latest
19+
timeout-minutes: 60
20+
permissions:
21+
contents: 'write'
22+
pull-requests: 'write'
23+
id-token: 'write'
24+
steps:
25+
- uses: actions/checkout@v3.1.0
26+
with:
27+
ref: ${{github.event.pull_request.head.ref}}
28+
repository: ${{github.event.pull_request.head.repo.full_name}}
29+
- uses: 'google-github-actions/auth@v1.0.0'
30+
with:
31+
workload_identity_provider: 'projects/1046198160504/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider'
32+
service_account: 'kokoro-system-test@long-door-651.iam.gserviceaccount.com'
33+
create_credentials_file: 'true'
34+
access_token_lifetime: 600s
35+
- uses: actions/setup-node@v3.5.1
36+
with:
37+
node-version: 16
38+
- run: npm install
39+
working-directory: scheduler
40+
- run: npm test
41+
working-directory: scheduler
42+
env:
43+
MOCHA_REPORTER_SUITENAME: scheduler
44+
MOCHA_REPORTER_OUTPUT: scheduler_sponge_log.xml
45+
MOCHA_REPORTER: xunit
46+
- if: ${{ github.event.action == 'labeled' && github.event.label.name == 'actions:force-run' }}
47+
uses: actions/github-script@v6
48+
with:
49+
github-token: ${{ secrets.GITHUB_TOKEN }}
50+
script: |
51+
try {
52+
await github.rest.issues.removeLabel({
53+
name: 'actions:force-run',
54+
owner: 'GoogleCloudPlatform',
55+
repo: 'nodejs-docs-samples',
56+
issue_number: context.payload.pull_request.number
57+
});
58+
} catch (e) {
59+
if (!e.message.includes('Label does not exist')) {
60+
throw e;
61+
}
62+
}
63+
- if: ${{ github.event_name == 'schedule'}}
64+
run: |
65+
curl https://github.com/googleapis/repo-automation-bots/releases/download/flakybot-1.1.0/flakybot -o flakybot -s -L
66+
chmod +x ./flakybot
67+
./flakybot --repo GoogleCloudPlatform/nodejs-docs-samples --commit_hash ${{github.sha}} --build_url https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}

.github/workflows/workflows.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"datalabeling",
3030
"datastore/functions",
3131
"datacatalog/quickstart",
32+
"document-ai",
3233
"endpoints/getting-started",
3334
"endpoints/getting-started-grpc",
3435
"error-reporting",
@@ -53,6 +54,7 @@
5354
"datacatalog/cloud-client",
5455
"datacatalog/quickstart",
5556
"datastore/functions",
57+
"scheduler",
5658
"talent",
5759
"contact-center-insights",
5860
"workflows"

document-ai/.eslintrc.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
rules:
3+
no-console: off
4+
node/no-unsupported-features/node-builtins: off
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/**
2+
* Copyright 2020 Google LLC
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
'use strict';
17+
18+
const uuid = require('uuid');
19+
20+
async function main(
21+
projectId = 'YOUR_PROJECT_ID',
22+
location = 'YOUR_PROJECT_LOCATION',
23+
processorId = 'YOUR_PROCESSOR_ID', // Create this in the Cloud Console
24+
gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf',
25+
gcsOutputUri = 'output-bucket',
26+
gcsOutputUriPrefix = uuid.v4()
27+
) {
28+
// [START documentai_batch_process_document]
29+
/**
30+
* TODO(developer): Uncomment these variables before running the sample.
31+
*/
32+
// const projectId = 'YOUR_PROJECT_ID';
33+
// const location = 'YOUR_PROJECT_LOCATION'; // Format is 'us' or 'eu'
34+
// const processorId = 'YOUR_PROCESSOR_ID';
35+
// const gcsInputUri = 'YOUR_SOURCE_PDF';
36+
// const gcsOutputUri = 'YOUR_STORAGE_BUCKET';
37+
// const gcsOutputUriPrefix = 'YOUR_STORAGE_PREFIX';
38+
39+
// Imports the Google Cloud client library
40+
const {DocumentProcessorServiceClient} =
41+
require('@google-cloud/documentai').v1;
42+
const {Storage} = require('@google-cloud/storage');
43+
44+
// Instantiates Document AI, Storage clients
45+
const client = new DocumentProcessorServiceClient();
46+
const storage = new Storage();
47+
48+
const {default: PQueue} = require('p-queue');
49+
50+
async function batchProcessDocument() {
51+
const name = `projects/${projectId}/locations/${location}/processors/${processorId}`;
52+
53+
// Configure the batch process request.
54+
const request = {
55+
name,
56+
inputDocuments: {
57+
gcsDocuments: {
58+
documents: [
59+
{
60+
gcsUri: gcsInputUri,
61+
mimeType: 'application/pdf',
62+
},
63+
],
64+
},
65+
},
66+
documentOutputConfig: {
67+
gcsOutputConfig: {
68+
gcsUri: `${gcsOutputUri}/${gcsOutputUriPrefix}/`,
69+
},
70+
},
71+
};
72+
73+
// Batch process document using a long-running operation.
74+
// You can wait for now, or get results later.
75+
// Note: first request to the service takes longer than subsequent
76+
// requests.
77+
const [operation] = await client.batchProcessDocuments(request);
78+
79+
// Wait for operation to complete.
80+
await operation.promise();
81+
console.log('Document processing complete.');
82+
83+
// Query Storage bucket for the results file(s).
84+
const query = {
85+
prefix: gcsOutputUriPrefix,
86+
};
87+
88+
console.log('Fetching results ...');
89+
90+
// List all of the files in the Storage bucket
91+
const [files] = await storage.bucket(gcsOutputUri).getFiles(query);
92+
93+
// Add all asynchronous downloads to queue for execution.
94+
const queue = new PQueue({concurrency: 15});
95+
const tasks = files.map((fileInfo, index) => async () => {
96+
// Get the file as a buffer
97+
const [file] = await fileInfo.download();
98+
99+
console.log(`Fetched file #${index + 1}:`);
100+
101+
// The results stored in the output Storage location
102+
// are formatted as a document object.
103+
const document = JSON.parse(file.toString());
104+
const {text} = document;
105+
106+
// Extract shards from the text field
107+
const getText = textAnchor => {
108+
if (!textAnchor.textSegments || textAnchor.textSegments.length === 0) {
109+
return '';
110+
}
111+
112+
// First shard in document doesn't have startIndex property
113+
const startIndex = textAnchor.textSegments[0].startIndex || 0;
114+
const endIndex = textAnchor.textSegments[0].endIndex;
115+
116+
return text.substring(startIndex, endIndex);
117+
};
118+
119+
// Read the text recognition output from the processor
120+
console.log('The document contains the following paragraphs:');
121+
122+
const [page1] = document.pages;
123+
const {paragraphs} = page1;
124+
for (const paragraph of paragraphs) {
125+
const paragraphText = getText(paragraph.layout.textAnchor);
126+
console.log(`Paragraph text:\n${paragraphText}`);
127+
}
128+
129+
// Form parsing provides additional output about
130+
// form-formatted PDFs. You must create a form
131+
// processor in the Cloud Console to see full field details.
132+
console.log('\nThe following form key/value pairs were detected:');
133+
134+
const {formFields} = page1;
135+
for (const field of formFields) {
136+
const fieldName = getText(field.fieldName.textAnchor);
137+
const fieldValue = getText(field.fieldValue.textAnchor);
138+
139+
console.log('Extracted key value pair:');
140+
console.log(`\t(${fieldName}, ${fieldValue})`);
141+
}
142+
});
143+
await queue.addAll(tasks);
144+
}
145+
// [END documentai_batch_process_document]
146+
147+
batchProcessDocument();
148+
}
149+
main(...process.argv.slice(2));

document-ai/package.json

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"name": "nodejs-document-ai-samples",
3+
"private": true,
4+
"license": "Apache-2.0",
5+
"author": "Google LLC",
6+
"engines": {
7+
"node": ">=12.0.0"
8+
},
9+
"files": [
10+
"*.js"
11+
],
12+
"scripts": {
13+
"test": "mocha test/*.js --timeout 600000"
14+
},
15+
"dependencies": {
16+
"@google-cloud/documentai": "^6.1.0",
17+
"@google-cloud/storage": "^6.0.0",
18+
"p-queue": "^6.6.2",
19+
"uuid": "^9.0.0"
20+
},
21+
"devDependencies": {
22+
"chai": "^4.2.0",
23+
"mocha": "^8.0.0"
24+
}
25+
}

0 commit comments

Comments
 (0)