Skip to content

Commit f2be76d

Browse files
authored
Merge pull request #80 from qtomlinson/qt/auto-detect-schema-versions
Add auto detect schema versions
2 parents 72524e9 + b94cbde commit f2be76d

File tree

11 files changed

+363
-144
lines changed

11 files changed

+363
-144
lines changed

tools/integration/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
- Components to be harvested,
1616
- Base URLs for the development and production systems, along with polling interval and timeout settings,
17-
- Current harvest schema versions. This is for polling harvest results to check whether the harvest is complete. When scan tool versions are updated, these need to be updated as well.
17+
- Current harvest tools. This is used for polling harvest results to check whether the harvest is complete. When scan tools are added or removed during the harvest process, this list needs to be updated as well.
1818

1919
1. Test fixtures are grouped by endpoints at [./test/integration/fixtures](./test/integration/fixtures). You can use these fixtures to override responses from the production system when necessary.
2020
1. The classes used in the integration tests are located at [./lib](./lib). Tests for those tooling classes are located at ./test/lib. Run `npm test` to test the tooling classes.
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// (c) Copyright 2024, SAP SE and ClearlyDefined contributors. Licensed under the MIT license.
2+
// SPDX-License-Identifier: MIT
3+
const { callFetch } = require('./fetch')
4+
5+
const defaultTools = ['licensee', 'reuse', 'scancode']
6+
7+
class HarvestResultFetcher {
8+
constructor(apiBaseUrl, coordinates, fetch = callFetch) {
9+
this.apiBaseUrl = apiBaseUrl
10+
this._fetch = fetch
11+
this._coordinates = coordinates
12+
}
13+
14+
async fetchToolVersions(tools = defaultTools) {
15+
const listHarvestResultApi = `${this.apiBaseUrl}/harvest/${this._coordinates}?form=list`
16+
const harvestResultUrls = await this._fetch(listHarvestResultApi).then(r => r.json())
17+
return tools.flatMap(tool =>
18+
harvestResultUrls
19+
.filter(url => url.includes(`/${tool}/`))
20+
.map(url => url.substring(`${this._coordinates}/${tool}/`.length))
21+
.map(version => [tool, version])
22+
)
23+
}
24+
25+
async _pollForCompletion(poller) {
26+
try {
27+
const completed = await poller.poll()
28+
console.log(`Completed ${this._coordinates}: ${completed}`)
29+
return completed
30+
} catch (error) {
31+
if (error.code === 'ECONNREFUSED') throw error
32+
console.log(`Error polling for ${this._coordinates}: ${error}`)
33+
return false
34+
}
35+
}
36+
37+
async pollForToolVersionsComplete(poller, startTime, tools) {
38+
const statuses = new Map()
39+
poller.with(async () => {
40+
const toolVersions = await this.fetchToolVersions(tools)
41+
return this.isHarvestComplete(toolVersions, startTime, statuses)
42+
})
43+
const completed = await this._pollForCompletion(poller)
44+
if (!completed) throw new Error(`Schema versions not detected`)
45+
return [...statuses.entries()].map(([k, v]) => [k, v.toolVersion])
46+
}
47+
48+
async pollForHarvestComplete(poller, toolVersions, startTime) {
49+
const statuses = new Map()
50+
poller.with(async () => this.isHarvestComplete(toolVersions, startTime, statuses))
51+
return this._pollForCompletion(poller)
52+
}
53+
54+
async isHarvestComplete(toolVersions, startTime, statuses = new Map()) {
55+
const harvestChecks = (toolVersions || []).map(async ([tool, toolVersion]) => {
56+
const completed = statuses.get(tool)?.completed || (await this.isHarvestedbyTool(tool, toolVersion, startTime))
57+
if (completed) statuses.set(tool, { toolVersion, completed })
58+
return tool
59+
})
60+
return Promise.all(harvestChecks).then(tools => tools.every(tool => statuses.get(tool)?.completed))
61+
}
62+
63+
async isHarvestedbyTool(tool, toolVersion, startTime = 0) {
64+
const harvested = await this.fetchHarvestResult(tool, toolVersion)
65+
if (!harvested._metadata) return false
66+
const fetchedAt = new Date(harvested._metadata.fetchedAt)
67+
console.log(`${this._coordinates} ${tool}, ${toolVersion} fetched at ${fetchedAt}`)
68+
return fetchedAt.getTime() > startTime
69+
}
70+
71+
async fetchHarvestResult(tool, toolVersion) {
72+
return this._fetch(`${this.apiBaseUrl}/harvest/${this._coordinates}/${tool}/${toolVersion}?form=raw`).then(r =>
73+
r.headers.get('Content-Length') === '0' ? Promise.resolve({}) : r.json()
74+
)
75+
}
76+
}
77+
78+
module.exports = HarvestResultFetcher

tools/integration/lib/harvester.js

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,22 @@
22
// SPDX-License-Identifier: MIT
33

44
const { callFetch, buildPostOpts } = require('./fetch')
5-
6-
//The versions correspond to the schema versions of the tools which are used in /harvest/{type}/{provider}/{namespace}/{name}/{revision}/{tool}/{toolVersion}
7-
//See https://api.clearlydefined.io/api-docs/#/harvest/get_harvest__type___provider___namespace___name___revision___tool___toolVersion_
8-
const defaultToolChecks = [
9-
['licensee', '9.14.0'],
10-
['scancode', '32.3.0'],
11-
['reuse', '3.2.1']
12-
]
5+
const HarvestResultFetcher = require('./harvestResultFetcher')
136

147
class Harvester {
158
constructor(apiBaseUrl, harvestToolChecks, fetch = callFetch) {
169
this.apiBaseUrl = apiBaseUrl
17-
this.harvestToolChecks = harvestToolChecks || defaultToolChecks
10+
this._harvestToolChecks = harvestToolChecks
1811
this._fetch = fetch
1912
}
2013

2114
async harvest(components, reharvest = false) {
15+
if (components.length === 0) return
2216
return await this._fetch(`${this.apiBaseUrl}/harvest`, buildPostOpts(this._buildPostJson(components, reharvest)))
2317
}
2418

2519
_buildPostJson(components, reharvest) {
26-
const tool = this.harvestToolChecks.length === 1 ? this.harvestToolChecks[0][0] : 'component'
20+
const tool = this._harvestToolChecks?.length === 1 ? this._harvestToolChecks[0][0] : 'component'
2721
return components.map(coordinates => {
2822
const result = { tool, coordinates }
2923
if (reharvest) result.policy = 'always'
@@ -32,52 +26,52 @@ class Harvester {
3226
}
3327

3428
async pollForCompletion(components, poller, startTime) {
29+
if (!this._harvestToolChecks) throw new Error('Harvest tool checks not set')
3530
const status = new Map()
3631
for (const coordinates of components) {
3732
const completed = await this._pollForOneCompletion(coordinates, poller, startTime)
3833
status.set(coordinates, completed)
3934
}
4035

4136
for (const coordinates of components) {
42-
const completed =
43-
status.get(coordinates) || (await this.isHarvestComplete(coordinates, startTime).catch(() => false))
37+
const completed = status.get(coordinates) || (await this._isHarvestComplete(coordinates, startTime))
4438
status.set(coordinates, completed)
4539
}
4640
return status
4741
}
4842

4943
async _pollForOneCompletion(coordinates, poller, startTime) {
50-
try {
51-
const completed = await poller.poll(async () => this.isHarvestComplete(coordinates, startTime))
52-
console.log(`Completed ${coordinates}: ${completed}`)
53-
return completed
54-
} catch (error) {
55-
if (error.code === 'ECONNREFUSED') throw error
56-
console.log(`Error polling for ${coordinates}: ${error}`)
57-
return false
58-
}
44+
return this.resultChecker(coordinates).pollForHarvestComplete(poller, this._harvestToolChecks, startTime)
5945
}
6046

61-
async isHarvestComplete(coordinates, startTime) {
62-
const harvestChecks = this.harvestToolChecks.map(([tool, toolVersion]) =>
63-
this.isHarvestedbyTool(coordinates, tool, toolVersion, startTime)
64-
)
65-
66-
return Promise.all(harvestChecks).then(results => results.every(r => r))
47+
async _isHarvestComplete(coordinates, startTime) {
48+
return this.resultChecker(coordinates)
49+
.isHarvestComplete(this._harvestToolChecks, startTime)
50+
.catch(error => {
51+
console.log(`Error polling for ${coordinates} completion: ${error}`)
52+
return false
53+
})
6754
}
6855

69-
async isHarvestedbyTool(coordinates, tool, toolVersion, startTime = 0) {
70-
const harvested = await this.fetchHarvestResult(coordinates, tool, toolVersion)
71-
if (!harvested._metadata) return false
72-
const fetchedAt = new Date(harvested._metadata.fetchedAt)
73-
console.log(`${coordinates} ${tool}, ${toolVersion} fetched at ${fetchedAt}`)
74-
return fetchedAt.getTime() > startTime
75-
}
56+
async detectSchemaVersions(component, poller, tools) {
57+
if (!component) throw new Error('Component not set')
58+
const startTime = Date.now()
59+
//make sure that we have one entire set of harvest results (old or new)
60+
await this.harvest([component])
61+
//trigger a reharvest to overwrite the old result, so we can verify the timestamp is new for completion
62+
await this.harvest([component], true)
7663

77-
async fetchHarvestResult(coordinates, tool, toolVersion) {
78-
return this._fetch(`${this.apiBaseUrl}/harvest/${coordinates}/${tool}/${toolVersion}?form=raw`).then(r =>
79-
r.headers.get('Content-Length') === '0' ? Promise.resolve({}) : r.json()
64+
const detectedToolVersions = await this.resultChecker(component).pollForToolVersionsComplete(
65+
poller,
66+
startTime,
67+
tools
8068
)
69+
console.log(`Detected schema versions: ${detectedToolVersions}`)
70+
this._harvestToolChecks = detectedToolVersions
71+
}
72+
73+
resultChecker(coordinates) {
74+
return new HarvestResultFetcher(this.apiBaseUrl, coordinates, this._fetch)
8175
}
8276
}
8377

tools/integration/lib/poller.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,17 @@ class Poller {
77
this.maxTime = maxTime
88
}
99

10-
async poll(activity) {
10+
with(activity) {
11+
this._activity = activity
12+
return this
13+
}
14+
15+
async poll() {
16+
if (typeof this._activity !== 'function') throw new Error('Activity not set')
1117
let counter = 0
1218
while (counter * this.interval < this.maxTime) {
1319
console.log(`Polling ${counter}`)
14-
const isDone = await activity()
20+
const isDone = await this._activity()
1521
if (isDone) return true
1622
await new Promise(resolve => setTimeout(resolve, this.interval))
1723
counter++

tools/integration/test/integration/e2e-test-service/curationTest.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33

44
const { deepStrictEqual, strictEqual, ok } = require('assert')
55
const { callFetch, buildPostOpts } = require('../../../lib/fetch')
6-
const { devApiBaseUrl, components, definition } = require('../testConfig')
6+
const { devApiBaseUrl, definition } = require('../testConfig')
77

88
describe('Validate curation', function () {
99
this.timeout(definition.timeout)
1010

1111
//Rest a bit to avoid overloading the servers
1212
afterEach(() => new Promise(resolve => setTimeout(resolve, definition.timeout / 2)))
1313

14-
const coordinates = components[0]
14+
const coordinates = 'maven/mavencentral/org.apache.httpcomponents/httpcore/4.4.16'
1515

1616
describe('Propose curation', function () {
1717
const [type, provider, namespace, name, revision] = coordinates.split('/')

tools/integration/test/integration/e2e-test-service/definitionTest.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,10 @@ function filesToMap(result) {
118118

119119
async function findDefinition(coordinates) {
120120
const [type, provider, namespace, name, revision] = coordinates.split('/')
121+
let coordinatesString = `type=${type}&provider=${provider}&name=${name}`
122+
coordinatesString += namespace && namespace !== '-' ? `&namespace=${namespace}` : ''
121123
const response = await callFetch(
122-
`${devApiBaseUrl}/definitions?type=${type}&provider=${provider}&namespace=${namespace}&name=${name}&sortDesc=true&sort=revision`
124+
`${devApiBaseUrl}/definitions?${coordinatesString}&sortDesc=true&sort=revision`
123125
).then(r => r.json())
124126
return response.data.find(d => d.coordinates.revision === revision)
125127
}

tools/integration/test/integration/harvestTest.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,14 @@ describe('Tests for harvesting different components', function () {
1919
})
2020

2121
async function harvestTillCompletion(components) {
22-
const { harvestSchemaVersions, poll } = harvest
23-
const harvester = new Harvester(devApiBaseUrl, harvestSchemaVersions)
22+
if (components.length === 0) return new Map()
23+
24+
const { poll, tools } = harvest
25+
const harvester = new Harvester(devApiBaseUrl)
26+
27+
const oneComponent = components.shift()
28+
const versionPoller = new Poller(poll.interval / 5, poll.maxTime)
29+
await harvester.detectSchemaVersions(oneComponent, versionPoller, tools)
2430

2531
//make sure that we have one entire set of harvest results (old or new)
2632
console.log('Ensure harvest results exist before starting tests')

tools/integration/test/integration/testConfig.js

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,14 @@ const devApiBaseUrl = 'https://dev-api.clearlydefined.io'
55
const prodApiBaseUrl = 'https://api.clearlydefined.io'
66

77
const pollingInterval = 1000 * 60 * 5 // 5 minutes
8-
const pollingMaxTime = 1000 * 60 * 30 // 30 minutes
8+
const pollingMaxTime = 1000 * 60 * 60 // 60 minutes
99

10-
//Havest results to check for harvest completeness
11-
//The versions correspond to the schema versions of the tools which are used in /harvest/{type}/{provider}/{namespace}/{name}/{revision}/{tool}/{toolVersion}
12-
//See https://api.clearlydefined.io/api-docs/#/harvest/get_harvest__type___provider___namespace___name___revision___tool___toolVersion_
13-
const harvestSchemaVersions = [
14-
['licensee', '9.14.0'],
15-
['scancode', '32.3.0'],
16-
['reuse', '3.2.1']
17-
]
10+
//Havest tools to check for harvest completeness
11+
const harvestTools = ['licensee', 'reuse', 'scancode']
1812

1913
//Components to test
2014
const components = [
15+
'pypi/pypi/-/platformdirs/4.2.0', //Keep this as the first element to test, it is relatively small
2116
'maven/mavencentral/org.apache.httpcomponents/httpcore/4.4.16',
2217
'maven/mavengoogle/android.arch.lifecycle/common/1.0.1',
2318
'maven/gradleplugin/io.github.lognet/grpc-spring-boot-starter-gradle-plugin/4.6.0',
@@ -26,7 +21,6 @@ const components = [
2621
'npm/npmjs/-/redis/0.1.0',
2722
'git/github/ratatui-org/ratatui/bcf43688ec4a13825307aef88f3cdcd007b32641',
2823
'gem/rubygems/-/sorbet/0.5.11226',
29-
'pypi/pypi/-/platformdirs/4.2.0',
3024
'pypi/pypi/-/sdbus/0.12.0',
3125
'go/golang/rsc.io/quote/v1.3.0',
3226
'nuget/nuget/-/NuGet.Protocol/6.7.1',
@@ -42,9 +36,9 @@ module.exports = {
4236
prodApiBaseUrl,
4337
components,
4438
harvest: {
45-
poll: { interval: pollingInterval, maxTime: pollingMaxTime },
46-
harvestSchemaVersions,
47-
timeout: 1000 * 60 * 60 * 2 // 2 hours for harvesting all the components
39+
poll: { interval: pollingInterval, maxTime: pollingMaxTime }, // for each component
40+
tools: harvestTools,
41+
timeout: 1000 * 60 * 60 * 4 // 4 hours for harvesting all the components
4842
},
4943
definition: {
5044
timeout: 1000 * 10 // for each component

0 commit comments

Comments
 (0)