Skip to content

Commit

Permalink
Import projects descriptions into redis
Browse files Browse the repository at this point in the history
  • Loading branch information
anvaka committed Dec 1, 2014
1 parent 2438100 commit 27dfa9f
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 0 deletions.
10 changes: 10 additions & 0 deletions 05_export_description_stream.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash
# This script will export data saved by `create_description_stream.sh`
# into gzipped CSV file in google storage

source ./scripts_config

echo "Exporting data from $DESCRIPTION_TABLE into $DESCRIPTION_BUCKET"

bq --project_id $PROJECT_ID \
extract --compression=GZIP $DESCRIPTION_TABLE $DESCRIPTION_BUCKET
8 changes: 8 additions & 0 deletions 06_download_description_stream.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
source ./scripts_config

echo "Downloading projects description from $DESCRIPTION_BUCKET"
gsutil cp $DESCRIPTION_BUCKET ./

echo "Extracting watchers information"
gunzip $DESCRIPTION_FILE
7 changes: 7 additions & 0 deletions 07_import_description_stream_to_redis.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

source ./scripts_config

echo "Importing projects information from $PLAIN_DESCRIPTION_FILE to redis"

node ./import_description_to_redis.js $PLAIN_DESCRIPTION_FILE
47 changes: 47 additions & 0 deletions import_description_to_redis.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* This script imports CSV file with projects information into redis
*
* Projects information is stored as hashes, where keys is "desc:" + project name
* and value is { description: string, watchers: number }
*/

var fileName = process.argv[2];
var fs = require('fs');

if (!fs.existsSync(fileName)) {
throw new Error('Cannot find input file with csv data: ' + fileName);
}

var inputFile = require('fs').createReadStream('./description.csv'),
redis = require("redis"),
client = redis.createClient(),
csv = require('csv-parse'),
parser = csv();

var processed = 0;
parser.on('readable', saveLine);
parser.on('end', function() {
client.unref();
});

inputFile.pipe(parser);

function saveLine() {
var line = parser.read();
processed += 1;
if (processed % 10000 === 0) console.log('Saved: ', processed);
var repo = line[0];
var splitIndex = repo.indexOf('/');
if (splitIndex <= 0) return; // Ignore invalid repositories

client.hmset('desc:' + repo,
'description', line[1],
'watchers', line[2],
printError(repo));
}

function printError(name) {
return function(err, res) {
if (err) console.log('!! Failed to save ' + name, err);
};
}

0 comments on commit 27dfa9f

Please sign in to comment.