Skip to content

Commit

Permalink
handling custom ports
Browse files Browse the repository at this point in the history
  • Loading branch information
Lukasz Kujawa committed Jan 1, 2014
1 parent af0b623 commit 8859d7f
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 5 deletions.
4 changes: 2 additions & 2 deletions webcrawler/job/driller.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ Driller.prototype.execute = function(callback, $, env) {
return;
}


url = self.normaliseUrl( url, env );

self.addSourceToWebDoc( url, urls, env.task.href );

self.addSourceToWebDoc( url, urls, env.task.href );
if( self.isValidUrl( url ) ) {
var doc = new UrlDoc( url );

Expand Down
14 changes: 11 additions & 3 deletions webcrawler/storage/doc/urldoc.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,14 @@ UrlDoc.prototype.setOverwrite = function( timeDiff ) {

UrlDoc.prototype.getUrl = function() {
var f = this.fields;
return f.protocol + '//' + f.hostname + f.uri;
var url = f.protocol + '//' + f.hostname;

if( f.port != 80 ) {
url += ':' + f.port;
}

url += f.uri;
return url
}

UrlDoc.prototype.getId = function() {
Expand Down Expand Up @@ -131,10 +138,11 @@ UrlDoc.prototype.initFromUrl = function( url ) {
this.fields.protocol = parts[1];
this.fields.hostname = parts[2];
this.fields.uri = parts[4];
if( parts[3] > 0 ) {
this.fields.port = parts[3];
if( parts[3] != '' ) {
this.fields.port = parts[3].replace(':','');
}
}

}

UrlDoc.prototype.getFields = function() {
Expand Down
1 change: 1 addition & 0 deletions webcrawler/utils/urltool.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
var UrlTool = exports = module.exports = {};

UrlTool.nomalise = function( url, env, plugins ) {

/**
* attach domain name to "/article/test"
*/
Expand Down

0 comments on commit 8859d7f

Please sign in to comment.