Skip to content

Commit

Permalink
Merge pull request #8 from geoblink/bugfix/job-error-callbacks
Browse files Browse the repository at this point in the history
Added callback usage on Scraper job errors
  • Loading branch information
Héctor Del Campo authored Sep 17, 2019
2 parents aa28193 + 198bd68 commit 80d925c
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 12 deletions.
4 changes: 3 additions & 1 deletion extension/scripts/Scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ Scraper.prototype = {
job.execute(this.browser, function (err, job) {
if (err) {
// jobs don't seem to return anything
return console.error('Error in job', err)
console.error('Error in job', err)
this.executionCallback(err)
return
}
debug('finished executing')
var scrapedRecords = []
Expand Down
9 changes: 6 additions & 3 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ function scrape (sitemapInfo, options = {}) {
store,
delay: options.delay || 500
}, {})
s.run(function () {
// TODO there should be some error handling here
resolve(store.data)
s.run(function (err) {
if (err) {
reject(err)
} else {
resolve(store.data)
}
})
})
}
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "web-scraper-headless",
"version": "1.0.6",
"version": "1.0.7",
"description": "Web Scraper Headless allows to extract data from web pages using plans (sitemaps) created with the Web Scraper browser extension. Using these sitemaps the Web Scraper will navigate the site accordingly and extract all data. Scraped data later can be exported as CSV.",
"main": "index.js",
"directories": {
Expand Down Expand Up @@ -63,7 +63,7 @@
"karma-mocha": "^1.3.0",
"mocha": "^3.2.0",
"npm-watch": "^0.1.8",
"sinon": "^5.0.10",
"sinon": "^7.4.2",
"standard": "^9.0.2",
"vinyl-buffer": "^1.0.0",
"vinyl-source-stream": "^1.1.0",
Expand All @@ -77,6 +77,6 @@
"jquery": "^3.2.1",
"jquery-deferred": "^0.3.1",
"jsdom": "^10.1.0",
"puppeteer": "^1.4.0"
"puppeteer": "1.5.0"
}
}
54 changes: 49 additions & 5 deletions tests/spec/ScraperSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@ const assert = require('chai').assert
const Sitemap = require('./../../extension/scripts/Sitemap')
const FakeStore = require('./../FakeStore')
const Scraper = require('./../../extension/scripts/Scraper')
const Job = require('./../../extension/scripts/Job')
const utils = require('./../utils')
const globals = require('../globals')
const sinon = require('sinon')

describe('Scraper', function () {
var q, store, $el
let q, store, $el
let $
let document
let window
let Browser

const sandbox = sinon.createSandbox()
beforeEach(function () {
$ = globals.$
document = globals.document
Expand All @@ -27,10 +29,53 @@ describe('Scraper', function () {
afterEach(function () {
while (document.body.firstChild) document.body.removeChild(document.body.firstChild)
})
afterEach(function () {
sandbox.restore()
})

it('Should handle error on job', function (done) {
sandbox.stub(Job.prototype, 'execute')
.callsFake(function (params, callback) {
callback(new Error('Fake error test'))
})

const sitemap = new Sitemap({
id: 'test',
startUrl: 'http://test.lv/',
selectors: [
{
'id': 'a',
'selector': '#scraper-test-one-page a',
'multiple': false,
type: 'SelectorText',
'parentSelectors': [
'_root'
]
}
]
}, {$, document, window})

let browser = new Browser({
pageLoadDelay: 100
})

const s = new Scraper({
queue: q,
sitemap: sitemap,
browser: browser,
store: store
}, {$, document, window})
s.run(function (err) {
if (err) {
done()
} else {
done(new Error('Test should have failed'))
}
})
})


it('should be able to scrape one page', function (done) {
var b = document.querySelector('#scraper-test-one-page a')
console.log(b)
var sitemap = new Sitemap({
id: 'test',
startUrl: 'http://test.lv/',
Expand Down Expand Up @@ -206,7 +251,6 @@ describe('Scraper', function () {
var image = Scraper.prototype.getFileFilename('image.jpg')
assert.equal(image, 'image.jpg')
})

// Not very clear what should jsdom do in this case
it.skip('should store images', function (done) {
var record = {
Expand Down

0 comments on commit 80d925c

Please sign in to comment.