Skip to content

Commit

Permalink
Streamlined recommendations
Browse files Browse the repository at this point in the history
Still not liking its perforamnce. Most likely will go back to redis
  • Loading branch information
anvaka committed Aug 25, 2015
1 parent a125d29 commit 1909643
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 10 deletions.
3 changes: 0 additions & 3 deletions 02_download_watch_stream.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,3 @@ gsutil cp $WATCHERS_BUCKET ./

echo "Extracting watchers information"
gunzip $WATCHERS_FILE

echo "Removing 'https://github.com/' prefix from $PLAIN_WATCHERS_FILE"
sed -i '' 's|https://github.com/||' $PLAIN_WATCHERS_FILE
5 changes: 3 additions & 2 deletions import_watchers_to_redis.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ inputFile.pipe(parser);

function saveLine() {
var line = parser.read();
var repo = line[0];
var login = line[1];
if (!line) return;
var login = line[0];
var repo = line[1];

// we want to fix twitter. Normally we should not care about it
// but in this it has to be changed, since it is so popular
Expand Down
4 changes: 2 additions & 2 deletions lib/load-watchers.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function loadWatchers(fileName, doneCb) {
var processed = 0;
parser.on('readable', processLine);
parser.on('end', function() {
doneCb(userRepo, repoUsers);
doneCb(repoUsers, userRepo);
});

inputFile.pipe(parser);
Expand All @@ -32,7 +32,7 @@ function loadWatchers(fileName, doneCb) {
addToMap(repoUsers, repo, user);

processed += 1;
if (processed % 100000 === 0) console.log('Processed: ', processed);
if (processed % 100000 === 0) console.log('Loaded: ', processed);
}

function addToMap(map, key, value) {
Expand Down
3 changes: 3 additions & 0 deletions lib/recommend.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ function recommend(repoName, repoMap, userMap) {
var shared = 0;
var a = followers.size < otherFollowers.size ? followers : otherFollowers;
var b = followers.size < otherFollowers.size ? otherFollowers : followers;

for (var i of a) {
if (b.has(i)) shared += 1;
}

result.push({
n: otherRepo,
// regular jaccard similarity:
r: shared / (total - shared)
});
}
Expand Down
22 changes: 22 additions & 0 deletions lib/save.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
var fs = require('fs');
var path = require('path');

module.exports = save;

function save(projectName, recommendation, OUTPUT_ROOT) {
if (!OUTPUT_ROOT) OUTPUT_ROOT = 'out';

console.log('Saving recommendation for', projectName);
var pair = projectName.split('/');
var user = pair[0];
var repo = pair[1];
var targetPath = path.join(OUTPUT_ROOT, user);
var targetFile = path.join(targetPath, repo + '.json').toLowerCase();

if (!fs.existsSync(targetPath)) {
fs.mkdirSync(targetPath);
}

fs.writeFileSync(targetFile, JSON.stringify(recommendation));
return projectName;
}
27 changes: 24 additions & 3 deletions recommend-node.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
var fileName = process.argv[2];
var load = require('./lib/load-watchers.js');
load('./sortedwatchers.csv', function (u, r) { global.r = r; global.u = u; });
var fileName = process.argv[2] || './watchers.csv';
var recommend = require('./lib/recommend.js');
var save = require('./lib/save.js');

var OUTPUT_ROOT = 'out';

console.log('Loading watchers information from ' + fileName);
var load = require('./lib/load-watchers.js');
load(fileName, buildRecommendations);

function buildRecommendations(repos, users) {
var total = repos.size;
var processed = 0;
repos.forEach(buildForRepository);

function buildForRepository(followers, repositoryName) {
processed += 1;
console.log(processed + '/' + total + '. Analyzing ' + repositoryName + '...');
if (followers.size < 50) {
console.log(' > Skipping ' + repositoryName + ' - too little followers');
return;
}
var related = recommend(repositoryName, repos, users).slice(0, 100);
save(repositoryName, related, OUTPUT_ROOT);
}
}

0 comments on commit 1909643

Please sign in to comment.