Skip to content

Commit 03b4d9f

Browse files
committed
Changes scrapper tasks to load any users public data
1 parent 59fc8e7 commit 03b4d9f

File tree

2 files changed

+68
-13
lines changed

2 files changed

+68
-13
lines changed

README.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,27 @@ $ rails s
1818
```
1919

2020
## Scraping github data
21+
The followings steps are necessary to connect to GitHub’s API to scrape and store Repository and User data.
2122

22-
This steps are necessary to connect to GitHub’s API to scrape and store Repository and User data:
23+
### Fetching current user data
2324

2425
```sh
2526
$ cd github_api
26-
$ rake "github:fetch[<GITHUB_ACCESS_TOKEN>]"
27+
$ rake "github:fetch_me" GITHUB_ACCESS_TOKEN=your_top_secret_access_token
2728
```
2829

30+
### Fetching others public user data
31+
32+
```sh
33+
$ cd github_api
34+
$ rake "github:fetch[felipetio,fabiofleitas]" GITHUB_ACCESS_TOKEN=your_top_secret_access_token
35+
```
2936
### Cronjobs
3037
To schedule cronjobs to scrape more data you can add a `.env` file with your token to protected it.
3138
`GITHUB_ACCESS_TOKEN=your_top_secret_access_token`
3239

3340
Now you can add the following comand in you corntab:
34-
`cd path/to/github_api && bundle exec rake github:fetch`
41+
`cd path/to/github_api && bundle exec rake github:fetch_me`
3542

3643
## Query Format
3744

lib/tasks/github.rake

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,71 @@
11
namespace :github do
2-
desc "Fetch github data and store it locally"
3-
task :fetch, [:github_access_token] => :environment do |task, args|
4-
github_access_token = args[:github_access_token] || ENV["GITHUB_ACCESS_TOKEN"]
2+
desc "Fetch github data from current user and store it locally"
3+
task :fetch_me => :environment do |task, args|
4+
github_access_token ENV["GITHUB_ACCESS_TOKEN"]
5+
56
validate_arguments(github_access_token)
6-
7-
client = Octokit::Client.new(:access_token => github_access_token)
87

9-
User.first_or_create(client.user.to_hash)
10-
client.repositories.each {|r| Repository.first_or_create(r.to_hash) }
8+
connect! github_access_token
9+
10+
fetch_user(@client.user)
11+
12+
fetch_repos(@client.user)
13+
14+
Rails.logger.info("Success fetch github data!")
15+
rescue Octokit::Unauthorized
16+
Rails.logger.error("Unable to fetch github data, ensure github_access_token is valid.")
17+
end
18+
19+
desc "Fetch github data from a list of users and store it locally (`github:fetch[felipetio,fabiofleitas]`)"
20+
task :fetch => :environment do |task, args|
21+
connect! ENV["GITHUB_ACCESS_TOKEN"]
22+
23+
if logins = args.extras and logins.empty?
24+
raise ArgumentError, "At least one user login must be passed via argumments"
25+
end
26+
27+
logins.each do |login|
28+
user = fetch_user(login)
29+
fetch_repos(user)
30+
end
1131

1232
Rails.logger.info("Success fetch github data!")
1333
rescue Octokit::Unauthorized
1434
Rails.logger.error("Unable to fetch github data, ensure github_access_token is valid.")
1535
end
1636

1737
private
18-
def validate_arguments(github_access_token)
19-
if not github_access_token.is_a?(String) or github_access_token.empty?
20-
raise ArgumentError, "A valid github_access_token must be passed via argumment or ENV"
38+
39+
def connect!(github_access_token)
40+
if not github_access_token.present?
41+
raise ArgumentError, "A valid github_access_token must be passed via ENV[GITHUB_ACCESS_TOKEN]"
42+
end
43+
44+
@client = Octokit::Client.new(
45+
access_token: github_access_token,
46+
auto_paginate: true
47+
)
48+
end
49+
50+
def fetch_user(user)
51+
user = user.is_a?(String) ? @client.user(user) : user
52+
Rails.logger.info("Fetching user #{user.login}")
53+
User.where(id: user.id).first_or_create(user.to_hash)
54+
55+
user
56+
end
57+
58+
def fetch_repos(user)
59+
last_response = user.rels[:repos].get
60+
61+
while true do
62+
last_response.data.each do |repo|
63+
Rails.logger.info("Fetching repo #{repo.name}")
64+
Repository.where(id: repo.id).first_or_create(repo.to_hash)
65+
end
66+
67+
break unless last_response.rels[:next]
68+
last_response = last_response.rels[:next].get
2169
end
2270
end
2371
end

0 commit comments

Comments
 (0)