Skip to content

Commit

Permalink
solr job added
Browse files Browse the repository at this point in the history
  • Loading branch information
Lukasz authored and Lukasz committed Nov 9, 2013
1 parent 67f4221 commit 290cacb
Showing 1 changed file with 52 additions and 23 deletions.
75 changes: 52 additions & 23 deletions webcrawler/job/solr.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ var solr = require('solr-client');
exports = module.exports = Solr;

function Solr(options) {
for( i in options.rules ) {
if( options.rules[ i ].filter != undefined ) {
options.rules[ i ].filter.pattern = new RegExp( options.rules[ i ].filter.pattern, 'g' );
}

}

this.options = options;
}

Expand All @@ -19,13 +26,49 @@ Solr.prototype.eachRule = function(callback) {
}
}

Solr.prototype.save = function(doc, callback) {
var client = solr.createClient();

client.add(doc, function(err, obj) {
if( err ) {
console.log( " --- Failed inserting document to Solr --- " );
console.log( err );
console.log( " --------- " );
console.log( doc );
console.log( " --------- " );
}
else {
client.commit(function(err, res) {
if( err ) {
console.log( " --- Failed commiting document to Solr --- " );
console.log( err );
console.log( " --------- " );
console.log( doc );
console.log( " --------- " );
}
});
}

callback();
});
}

Solr.prototype.applyFilter = function( str, rule ) {
if( rule.filter == undefined ) {
return str;
}

return str.replace( rule.filter.pattern, rule.filter.replacement );
}

Solr.prototype.execute = function(callback, data, env) {
if( env.res.headers['content-type'] == undefined || ! env.res.headers['content-type'].match( /^text\/html/) ) {
return callback();
}

var $ = cheerio.load( data ),
self = this,
saveDoc = false,
doc = {
id: env.task.href
};
Expand All @@ -39,6 +82,8 @@ Solr.prototype.execute = function(callback, data, env) {
var content = $(this).attr( rule.attribute );
}

content = self.applyFilter( content, rule );

if( doc[ rule.field ] == undefined ) {
doc[ rule.field ] = content;
}
Expand All @@ -48,33 +93,17 @@ Solr.prototype.execute = function(callback, data, env) {
else {
doc[ rule.field ].push( content );
}

saveDoc = true;
});
});

var client = solr.createClient();

client.add(doc, function(err, obj) {
if( err ) {
console.log( " --- Failed inserting document to Solr --- " );
console.log( err );
console.log( " --------- " );
console.log( doc );
console.log( " --------- " );
}
else {
client.commit(function(err, res) {
if( err ) {
console.log( " --- Failed commiting document to Solr --- " );
console.log( err );
console.log( " --------- " );
console.log( doc );
console.log( " --------- " );
}
});
}

if( saveDoc ) {
this.save( doc, callback );
}
else {
callback();
});
}

}

0 comments on commit 290cacb

Please sign in to comment.