Skip to content

Commit

Permalink
Added importer script
Browse files Browse the repository at this point in the history
  • Loading branch information
endSly committed Feb 21, 2014
1 parent 0d95104 commit 943d0b5
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 0 deletions.
46 changes: 46 additions & 0 deletions importer/import.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
var jsdom = require("jsdom")
, async = require("async")
, csv = require("csv");

var count = 0;

function readPage(page, output, cb) {
jsdom.env(page, ["http://code.jquery.com/jquery.js"], function (err, window) {
count = 0;
window.$('ol li a').each(function (i, el) {
output.write([el.innerHTML, window.$(el).attr('href')]);
++count;
});
cb();
});
}

function loadList(dom) {
return function (cb) {
var start = 1;
var output = csv().to(dom + ".csv");
process.stdout.write("["+dom+"] ");
async.doUntil(function(cb) {
var page = "http://univ.cc/search.php?dom=" + dom + "&key=&start=" + start;
readPage(page, output, cb);

}, function() {
start += 50;
process.stdout.write('.');
return count == 0;

}, function () {
output.end();
process.stdout.write('\n');
cb();
});
};
}

async.series([
loadList("edu"),
loadList("world")
]);



15 changes: 15 additions & 0 deletions importer/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"name": "univcc-scrapper",
"version": "0.0.1",
"author": "Endika Gutiérrez <me@endika.net>",
"description": "univ.cc scrapper",
"dependencies" : {
"jsdom" : "0.10.1",
"async" : "*",
"csv" : "*"
},
"license": "MIT",
"engines": {
"node": ">=0.6"
}
}

0 comments on commit 943d0b5

Please sign in to comment.