Skip to content

Commit 3bd4355

Browse files
feat: implement discover route (#36)
* feat: implement `discover` route * chore(test): tidy up * chore: remove empty line * fix: strip code no longer required for helix 5
1 parent 0300475 commit 3bd4355

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3429
-343
lines changed

src/cache/handler.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import { Response } from '@adobe/fetch';
1313
import purge, { PURGE_PREVIEW_AND_LIVE } from './purge.js';
1414

1515
/**
16-
* Allowed methods for that handler.
16+
* Allowed methods for that handler
1717
*/
1818
const ALLOWED_METHODS = ['POST'];
1919

src/code/handler.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import { Response } from '@adobe/fetch';
1313

1414
/**
15-
* Allowed methods for that handler.
15+
* Allowed methods for that handler
1616
*/
1717
const ALLOWED_METHODS = ['POST'];
1818

src/config/utils.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,18 @@ export async function getUserListPaths(context) {
107107
}
108108
return Array.from(paths);
109109
}
110+
111+
/**
112+
* Return the contents of the `.hlx.json` file in a project.
113+
*
114+
* @param {import('@adobe/helix-shared-storage').Bucket} contentBus content bus bucket
115+
* @param {string} contentBusId content bus id
116+
* @returns contents of `.hlx.json` or null
117+
*/
118+
export async function fetchHlxJson(contentBus, contentBusId) {
119+
const buf = await contentBus.get(`${contentBusId}/.hlx.json`);
120+
if (!buf) {
121+
return null;
122+
}
123+
return JSON.parse(buf.toString());
124+
}

src/contentproxy/handler.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import { Response } from '@adobe/fetch';
1313
import { contentProxy } from './index.js';
1414

1515
/**
16-
* Allowed methods for that handler.
16+
* Allowed methods for that handler
1717
*/
1818
const ALLOWED_METHODS = ['GET'];
1919

src/discover/cdn-identifier.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ export async function querySiblingSites(context, info) {
5959
const { owner, repo } = info;
6060
const codeBusId = `${owner}/${repo}`;
6161

62-
const inventory = new Inventory(log, HelixStorage.fromContext(context).contentBus());
62+
const inventory = new Inventory(HelixStorage.fromContext(context).contentBus(), log);
6363
if (!await inventory.load()) {
6464
log.warn('Inventory not available');
6565
return [];

src/discover/handler.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright 2025 Adobe. All rights reserved.
3+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License. You may obtain a copy
5+
* of the License at https://www.apache.org/licenses/LICENSE-2.0
6+
*
7+
* Unless required by applicable law or agreed to in writing, software distributed under
8+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9+
* OF ANY KIND, either express or implied. See the License for the specific language
10+
* governing permissions and limitations under the License.
11+
*/
12+
import { Response } from '@adobe/fetch';
13+
import query from './query.js';
14+
import reindex from './reindex.js';
15+
import remove from './remove.js';
16+
17+
/**
18+
* Allowed methods for that handler
19+
*/
20+
const ALLOWED_METHODS = ['GET', 'POST', 'DELETE'];
21+
22+
/**
23+
* Handles the discover route.
24+
*
25+
* @param {import('../support/AdminContext').AdminContext} context context
26+
* @param {import('../support/RequestInfo').RequestInfo} info request info
27+
* @returns {Promise<Response>} response
28+
*/
29+
export default async function discoverHandler(context, info) {
30+
const { attributes: { authInfo } } = context;
31+
32+
if (ALLOWED_METHODS.indexOf(info.method) < 0) {
33+
return new Response('method not allowed', {
34+
status: 405,
35+
});
36+
}
37+
if (info.method === 'GET') {
38+
return query(context, info);
39+
}
40+
41+
authInfo.assertPermissions('discover:write');
42+
if (info.method === 'POST') {
43+
return reindex(context, info);
44+
}
45+
return remove(context, info);
46+
}

src/discover/inventory.js

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import { isDeepStrictEqual } from 'util';
3434
/**
3535
* Inventory path in content bus.
3636
*/
37-
const INVENTORY_PATH = '/default/inventory-v2.json';
37+
const INVENTORY_PATH = '/default/inventory.json';
3838

3939
/**
4040
* Simple inventory class
@@ -49,12 +49,12 @@ export class Inventory {
4949
};
5050

5151
/**
52-
* @type {Bucket}
52+
* @type {import('@adobe/helix-shared-storage').Bucket}
5353
*/
5454
#bucket;
5555

5656
/**
57-
* @type {Logger}
57+
* @type {any}
5858
*/
5959
#log;
6060

@@ -64,10 +64,11 @@ export class Inventory {
6464
#modified;
6565

6666
/**
67-
* @param {Logger} log
68-
* @param {Bucket} bucket
67+
* @constructs Inventory
68+
* @param {import('@adobe/helix-shared-storage').Bucket} bucket bucket
69+
* @param {any} log logger
6970
*/
70-
constructor(log, bucket) {
71+
constructor(bucket, log) {
7172
this.#log = log;
7273
this.#bucket = bucket;
7374
this.#modified = true;

src/discover/matcher/github.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright 2025 Adobe. All rights reserved.
3+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License. You may obtain a copy
5+
* of the License at https://www.apache.org/licenses/LICENSE-2.0
6+
*
7+
* Unless required by applicable law or agreed to in writing, software distributed under
8+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9+
* OF ANY KIND, either express or implied. See the License for the specific language
10+
* governing permissions and limitations under the License.
11+
*/
12+
13+
/**
14+
* Matcher that filters inventory entries against github url.
15+
*/
16+
export default class GithubMatcher {
17+
/**
18+
* Find the inventory entries that have the given github URL.
19+
*
20+
* @param {URL} url google document or spreadsheet
21+
* @param {Inventory} inventory inventory of entries
22+
*/
23+
// eslint-disable-next-line class-methods-use-this
24+
filter(url, inventory) {
25+
const segs = url.pathname.split('/');
26+
const [, owner, repo] = segs;
27+
const codeBusId = `${owner}/${repo}`;
28+
return inventory.entries().filter((entry) => entry.codeBusId === codeBusId);
29+
}
30+
31+
/**
32+
* Test whether this class can handle an URL
33+
*
34+
* @param {URL} url url to match
35+
* @returns true if this class can handle the URL
36+
*/
37+
static match(url) {
38+
return url.host === 'github.com';
39+
}
40+
}

src/discover/matcher/google.js

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
/*
2+
* Copyright 2025 Adobe. All rights reserved.
3+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License. You may obtain a copy
5+
* of the License at https://www.apache.org/licenses/LICENSE-2.0
6+
*
7+
* Unless required by applicable law or agreed to in writing, software distributed under
8+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9+
* OF ANY KIND, either express or implied. See the License for the specific language
10+
* governing permissions and limitations under the License.
11+
*/
12+
import { getCachePlugin } from '@adobe/helix-shared-tokencache';
13+
import processQueue from '@adobe/helix-shared-process-queue';
14+
15+
/**
16+
* Build an object containing all gdrive root ids as keys.
17+
*
18+
* @param {array} inventory inventory of repositories
19+
* @returns object with gdrive root ids
20+
*/
21+
function buildRoots(inventory) {
22+
return inventory.filter(({ gdriveId }) => !!gdriveId)
23+
.reduce((roots, { gdriveId }) => {
24+
// eslint-disable-next-line no-param-reassign
25+
roots[gdriveId] = '/';
26+
return roots;
27+
}, {});
28+
}
29+
30+
/**
31+
* A custom user consists of a project (org/site) and a content bus id.
32+
*
33+
* @typedef CustomUser
34+
* @property {string} project
35+
* @property {string} contentBusId
36+
*/
37+
38+
/**
39+
* Matcher that filters inventory entries against known google drives.
40+
*/
41+
export default class GoogleMatcher {
42+
constructor(context) {
43+
this.context = context;
44+
}
45+
46+
/**
47+
* Return all custom users that we should use to lookup Google items.
48+
*
49+
* @param {import('../inventory.js').InventoryEntry[]} entries entries
50+
* @returns {CustomUser[]}
51+
*/
52+
#getCustomUsers(entries) {
53+
const { env } = this.context;
54+
55+
return (env.HLX_CUSTOM_GOOGLE_USERS ?? '').split(',')
56+
.map((project) => {
57+
const [org, site] = project.trim().split('/');
58+
return { org, site };
59+
})
60+
.reduce((users, { org, site }) => {
61+
// for orgs (i.e. site = '*'), return just the first custom user
62+
// adorned project in that org. this avoids doing a lookup with
63+
// the same registered user multiple times
64+
const entry = entries.find((e) => !!e.customUser
65+
&& e.org === org && (site === '*' || e.site === site));
66+
if (entry) {
67+
const { contentBusId } = entry;
68+
users.push({ project: `${org}/${entry.site}`, contentBusId });
69+
}
70+
return users;
71+
}, []);
72+
}
73+
74+
/**
75+
* Find the inventory entries that have the given google document, spreadsheet
76+
* or folder in their tree.
77+
*
78+
* @param {URL} url google document or spreadsheet
79+
* @param {Inventory} inventory inventory of entries
80+
*/
81+
async filter(url, inventory) {
82+
const { log } = this.context;
83+
84+
const segs = url.pathname.split('/');
85+
let id = segs.pop();
86+
if (id.startsWith('edit')) {
87+
id = segs.pop();
88+
}
89+
if (id === '') {
90+
log.info(`Google URL contains no id: ${url}`);
91+
return [];
92+
}
93+
94+
// finding the inventory items for google is a bit more tricky, as we can't match the url with
95+
// the mountpoint, because everything is just an ID. we need to lookup the hierarchy of the
96+
// item in the url; but for that we need to use the correct connected user. fortunately,
97+
// 99% of the projects use the default google user, so we try to resolve with that first.
98+
// if the item specified in the url id is not found, we need to traverse all google entries
99+
// with the `customUser` flag and try to load it using the entry user.
100+
101+
try {
102+
const entries = inventory.entries();
103+
104+
// trivial case, id == mountpoint
105+
let ret = entries.filter(({ gdriveId }) => gdriveId === id);
106+
if (ret.length) {
107+
// we don't want to support overlapping projects, so we return the once found here
108+
log.info('%j', {
109+
discover: {
110+
id,
111+
count: ret.length,
112+
client: false,
113+
},
114+
});
115+
return ret;
116+
}
117+
118+
// resolve using the default user
119+
const roots = buildRoots(entries);
120+
let client = await this.context.getGoogleClient();
121+
let hierarchy = await client.getItemsFromId(id, roots);
122+
if (hierarchy.length) {
123+
const { id: rootId } = hierarchy[hierarchy.length - 1];
124+
ret = entries.filter(({ gdriveId }) => gdriveId === rootId);
125+
log.info('%j', {
126+
discover: {
127+
id,
128+
count: ret.length,
129+
client: true,
130+
},
131+
});
132+
return ret;
133+
}
134+
135+
// if still nothing found. find using the entries with a custom user
136+
ret = null;
137+
const customUsers = this.#getCustomUsers(entries);
138+
await processQueue(customUsers, async ({ project, contentBusId }) => {
139+
if (!ret) {
140+
try {
141+
// eslint-disable-next-line no-await-in-loop
142+
client = await this.context.getGoogleClient(contentBusId);
143+
// eslint-disable-next-line no-await-in-loop
144+
hierarchy = await client.getItemsFromId(id, roots);
145+
if (hierarchy.length) {
146+
const { id: rootId } = hierarchy[hierarchy.length - 1];
147+
ret = entries.filter(({ gdriveId }) => gdriveId === rootId);
148+
log.info('%j', {
149+
discover: {
150+
id,
151+
count: ret.length,
152+
client: true,
153+
project,
154+
},
155+
});
156+
}
157+
} catch (e) {
158+
log.info(`Unable to get items from id: ${url} in ${project}: ${e.message}`);
159+
}
160+
}
161+
}, 3);
162+
return ret ?? [];
163+
} catch (e) {
164+
log.info(`Unable to get items from id: ${url}: ${e.message}`);
165+
return [];
166+
}
167+
}
168+
169+
/**
170+
* Test whether this class can handle an URL
171+
*
172+
* @param {URL} url url to match
173+
* @param {Inventory} inventory
174+
* @returns true if this class can handle the URL
175+
*/
176+
static match(url, inventory) {
177+
return inventory.getHostType(url.hostname) === 'google' || url.hostname.match(/^.*\.google\.com$/);
178+
}
179+
180+
/**
181+
* Extract some data from a URL to store in the inventory.
182+
*
183+
* @param {import('../../index.js').AdminContext} context context
184+
* @param {URL} url url to extract data from
185+
* @returns object that contains additional entries to store in inventory
186+
*/
187+
static async extract(context, url, entry) {
188+
const match = url.pathname.match(/\/.*\/folders\/([^?/]+)$/);
189+
if (match) {
190+
// eslint-disable-next-line no-param-reassign
191+
[, entry.gdriveId] = match;
192+
// check for custom user
193+
if (entry.contentBusId) {
194+
const { code: codeBucket, content: contentBucket } = context.attributes.bucketMap;
195+
const plugin = await getCachePlugin({
196+
log: context.log,
197+
contentBusId: entry.contentBusId,
198+
readOnly: true,
199+
codeBucket,
200+
contentBucket,
201+
}, 'google');
202+
if (!plugin.key.startsWith('default/.helix-auth/')) {
203+
// eslint-disable-next-line no-param-reassign
204+
entry.customUser = true;
205+
}
206+
}
207+
}
208+
}
209+
}

0 commit comments

Comments
 (0)