Skip to content

Commit

Permalink
Expose target URL content type
Browse files Browse the repository at this point in the history
Added a `Scoop.targetUrlContentType` property, which keeps track of the content-type detected in `Scoop.#detectAndCaptureNonWebContent()`.

This property is exposed by `Scoop.summary()`, and therefore present in the JSON file generated by the `--json-summary-output` CLI option.
  • Loading branch information
matteocargnelutti committed May 8, 2023
1 parent 4f30842 commit f42f616
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
19 changes: 18 additions & 1 deletion Scoop.js
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ export class Scoop {
*/
targetUrlIsWebPage = true

/**
* Content-type of the target url.
* Assumed `text/html` unless detected otherwise.
* @type {string}
*/
targetUrlContentType = 'text/html; charset=utf-8'

/**
* Current settings.
* @type {ScoopOptions}
Expand Down Expand Up @@ -624,6 +631,8 @@ export class Scoop {
* - Captures it via a curl behind our proxy
* - Sets capture state to `PARTIAL`
*
* Populates `this.targetUrlIsWebPage` and `this.targetUrlContentType`.
*
* @param {Page} page - A Playwright [Page]{@link https://playwright.dev/docs/api/class-page} object
* @returns {Promise<void>}
* @private
Expand Down Expand Up @@ -676,7 +685,13 @@ export class Scoop {
return
}

// If text/html, continue capture as normal
// Capture content-type
if (contentType) {
this.targetUrlContentType = contentType
}

// If text/html, bail from non-web content capture process.
// Scoop.capture will go based on the value of `this.targeredUrlIsWebPage`.
if (contentType?.startsWith('text/html')) {
this.log.info('Requested URL is a web page')
return
Expand Down Expand Up @@ -1370,6 +1385,7 @@ export class Scoop {
* @property {string[]} states - Zero-indexed Scoop.states values.
* @property {string} targetUrl
* @property {boolean} targetUrlIsWebPage
* @property {string} targetUrlContentType
* @property {ScoopOptions} options
* @property {string} startedAt - ISO-formated date
* @property {string[]} blockedRequests
Expand Down Expand Up @@ -1399,6 +1415,7 @@ export class Scoop {
states: Object.keys(Scoop.states), // So summary.states[summary.state] = 'NAME-OF-STATE'
targetUrl: this.url,
targetUrlIsWebPage: this.targetUrlIsWebPage,
targetUrlContentType: this.targetUrlContentType,
options: this.options,
startedAt: this.startedAt,
blockedRequests: [],
Expand Down
11 changes: 11 additions & 0 deletions Scoop.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ await test('Scoop - capture of a web page.', async (t) => {
const summary = await capture.summary()
assert(summary)
assert.equal(summary.targetUrl, capture.url)
assert.equal(summary.targetUrlContentType, 'text/html; charset=UTF-8')
assert.equal(summary.state, Scoop.states.COMPLETE)
assert.equal(summary.exchangeUrls.length, capture.exchanges.length)
assert.equal(summary.attachments.provenanceSummary, 'provenance-summary.html')
Expand Down Expand Up @@ -141,6 +142,16 @@ await test('Scoop - capture of a non-web resource.', async (t) => {
// assert.notEqual(html.response.body, testPdfFixture)
})

await t.test('Scoop.summary() returns a valid object', async (_t) => {
const capture = await Scoop.capture(`${URL}/test.pdf`, options)
const summary = await capture.summary()
assert(summary)
assert.equal(summary.targetUrl, capture.url)
assert.equal(summary.targetUrlContentType, 'application/pdf')
assert.equal(summary.state, Scoop.states.PARTIAL)
assert.equal(summary.exchangeUrls.length, capture.exchanges.length)
})

/*
* TEARDOWN
*/
Expand Down

0 comments on commit f42f616

Please sign in to comment.