Skip to content

Commit

Permalink
Updated capture summary
Browse files Browse the repository at this point in the history
  • Loading branch information
matteocargnelutti committed May 5, 2023
1 parent 4d8a341 commit 2b65550
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 4 deletions.
67 changes: 64 additions & 3 deletions Scoop.js
Original file line number Diff line number Diff line change
Expand Up @@ -1374,17 +1374,27 @@ export class Scoop {
* @property {string} startedAt - ISO-formated date
* @property {string[]} blockedRequests
* @property {string[]} noArchiveUrls
* @property {string[]} exchangeUrls
* @property {?string} captureIp
* @property {?string} userAgent
* @property {string[]} exchangeUrls
* @property {object} attachments
* @property {?string} attachments.provenanceSummary - Filename
* @property {?string} attachments.screenshot - Filename
* @property {?string} attachments.pdfSnapshot - Filename
* @property {?string} attachments.domSnapshot - Filename
* @property {?string} attachments.videoExtractedSummary - Filename
* @property {?string} attachments.videoExtractedMetadata - Filename
* @property {?string[]} attachments.videoExtracted - Filenames
* @property {?string[]} attachments.videoExtractedSubtitles - Filenames
* @property {?string[]} attachments.certificates - Filenames
*/

/**
* Generates and returns a summary of the current capture object, regardless of its state.
* @returns {Promise<ScoopCaptureSummary>}
*/
async summary () {
return {
const summary = {
state: this.state,
states: Scoop.states,
targetUrl: this.url,
Expand All @@ -1395,7 +1405,58 @@ export class Scoop {
noArchiveUrls: [],
captureIp: this.provenanceInfo?.captureIp,
userAgent: this.provenanceInfo?.userAgent,
exchangeUrls: this.exchanges.map(exchange => exchange.url)
exchangeUrls: this.exchanges.map(exchange => exchange.url),
attachments: {}
}

//
// Summarize attachments
//
const generatedExchanges = this.extractGeneratedExchanges()

// 1-to-1 matches:
// - Add filename to "attachments" as key if present in generated exchanges list
// - Example: attachments.provenanceSummary = "provenance-summary.html"
for (const [key, filename] of Object.entries({
provenanceSummary: 'provenance-summary.html',
screenshot: 'screenshot.png',
pdfSnapshot: 'pdf-snapshot.pdf',
domSnapshot: 'dom-snapshot.html',
videoExtractedSummary: 'video-extracted-summary.html',
videoExtractedMetadata: 'video-extracted-metadata.json'
})) {
if (generatedExchanges[filename]) {
summary.attachments[key] = filename
}
}

// 1-to-many matches:
// - Videos are added to attachments.videoExtracted[]
// - Video subtitles are added to attachments.videoSubtitles[]
// - SSL certs are added to attachments.certificates[]
for (const filename of Object.keys(generatedExchanges)) {
if (filename.endsWith('.mp4')) {
if (!summary.attachments?.videos) {
summary.attachments.videoExtracted = []
}
summary.attachments.videoExtracted.push(filename)
}

if (filename.endsWith('.vtt')) {
if (!summary.attachments?.videoSubtitles) {
summary.attachments.videoExtractedSubtitles = []
}
summary.attachments.videoExtractedSubtitles.push(filename)
}

if (filename.endsWith('.pem')) {
if (!summary.attachments?.certificates) {
summary.attachments.certificates = []
}
summary.attachments.certificates.push(filename)
}
}

return summary
}
}
3 changes: 2 additions & 1 deletion Scoop.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,13 @@ await test('Scoop - capture of a web page.', async (t) => {
})

await t.test('Scoop.summary() returns a valid object', async (_t) => {
const capture = await Scoop.capture(`${URL}/test.html`, options)
const capture = await Scoop.capture(`${URL}/test.html`, { ...options, provenanceSummary: true })
const summary = await capture.summary()
assert(summary)
assert.equal(summary.targetUrl, capture.url)
assert.equal(summary.state, Scoop.states.COMPLETE)
assert.equal(summary.exchangeUrls.length, capture.exchanges.length)
assert.equal(summary.attachments.provenanceSummary, 'provenance-summary.html')
})

/*
Expand Down

0 comments on commit 2b65550

Please sign in to comment.