Skip to content

Commit c167050

Browse files
committed
add github import for starred projects, followers, following
1 parent 45b41af commit c167050

File tree

5 files changed

+197
-26
lines changed

5 files changed

+197
-26
lines changed

pyhackers/model/cassandra/hierachy.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def to_dict(self):
198198
def published_date(self):
199199
return format_date(self.published_at)
200200

201+
201202
class DiscussionPost(MBase):
202203
disc_id = columns.BigInt(primary_key=True)
203204
post_id = columns.BigInt(primary_key=True)
@@ -294,3 +295,48 @@ class PostReply(MBase):
294295
post_id = columns.BigInt(primary_key=True)
295296
reply_post_id = columns.BigInt(primary_key=True)
296297

298+
299+
class GithubProject(MBase):
300+
id = columns.Integer(primary_key=True)
301+
full_name = columns.Text(index=True)
302+
description = columns.Text()
303+
homepage = columns.Text()
304+
fork = columns.Boolean()
305+
forks_count = columns.Integer()
306+
language = columns.Text()
307+
master_branch = columns.Text()
308+
name = columns.Text()
309+
network_count = columns.Integer()
310+
open_issues = columns.Integer()
311+
url = columns.Text()
312+
watchers_count = columns.Integer()
313+
is_py = columns.Boolean()
314+
owner = columns.Integer()
315+
hide = columns.Boolean(default=False)
316+
317+
318+
class GithubUser(MBase):
319+
nick = columns.Text(primary_key=True)
320+
id = columns.Integer(index=True)
321+
email = columns.Text()
322+
followers = columns.Integer()
323+
following = columns.Integer()
324+
image = columns.Text()
325+
blog = columns.Text()
326+
bio = columns.Text()
327+
company = columns.Text()
328+
location = columns.Text()
329+
name = columns.Text()
330+
url = columns.Text()
331+
utype = columns.Text()
332+
public_gists = columns.Integer()
333+
public_repos = columns.Integer()
334+
# Ref user info does not contain all the information.
335+
full_profile = columns.Boolean(default=True)
336+
337+
338+
class GithubUserList(MBase):
339+
user = columns.Text(primary_key=True)
340+
starred = columns.List(value_type=columns.Text)
341+
following = columns.List(value_type=columns.Text)
342+
followers = columns.List(value_type=columns.Text)

pyhackers/model/cassandra/management.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ def create(cassa_key_space='pyhackers'):
5151
sync_table(DiscussionCounter)
5252
sync_table(DiscussionFollower)
5353

54+
sync_table(GithubProject)
55+
sync_table(GithubUser)
56+
sync_table(GithubUserList)
57+
5458

5559
def test_insert():
5660
from datetime import datetime as dt

pyhackers/service/user.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77
from pyhackers.model.action import Action, ActionType
88
from pyhackers.db import DB as db
99
from pyhackers.model.cassandra.hierachy import User as CsUser, UserFollower, UserFollowing, UserProject, Project, UserPost, UserDiscussion
10-
from pyhackers.apps.idgen import idgen_client
1110
from pyhackers.sentry import sentry_client
12-
import simplejson as json
1311
from pyhackers.job_scheduler import worker_queue
1412

1513

@@ -208,7 +206,6 @@ def get_user_projects_by_nick(nick):
208206
return user, os_projects
209207

210208

211-
212209
def get_user_timeline_by_nick(nick):
213210
try:
214211
user = CsUser.filter(nick=nick).first()
@@ -226,17 +223,13 @@ def get_user_timeline_by_nick(nick):
226223
def load_github_data():
227224
return
228225
access_token, config = None
229-
from github import Github
230-
g = Github(access_token,
231-
client_id=config.get("github", 'client_id'),
232-
client_secret=config.get("github", 'client_secret'), per_page=100)
226+
227+
233228

234229
user = g.get_user("mitsuhiko")
235230
#TODO: Create a task to fetch all the other information..
236231

237-
starred = user.get_starred()
238-
for s in starred:
239-
print s.full_name, s.watchers
232+
240233

241234
pub_events = user.get_public_events()
242235

pyhackers/worker/github_worker.py

Lines changed: 135 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import logging
2+
#from cqlengine import BatchQuery
3+
from cqlengine.query import DoesNotExist
4+
from pyhackers.config import config
25
from pyhackers.model.user import User, SocialUser
3-
from pyhackers.model.cassandra.hierachy import (
4-
User as CsUser, Post as CsPost, UserPost as CsUserPost, UserFollower as CsUserFollower,
5-
UserTimeLine)
6+
from pyhackers.model.cassandra.hierachy import (GithubProject, GithubUserList, GithubUser,
7+
User as CsUser, Post as CsPost, UserPost as CsUserPost, UserFollower as CsUserFollower,
8+
UserTimeLine)
9+
from github import Github
610

711

812
class RegistrationGithubWorker():
@@ -11,22 +15,148 @@ class RegistrationGithubWorker():
1115
following users/followers
1216
"""
1317

14-
def __init__(self, user_id, social_account_id):
18+
def __init__(self, user_id, social_account_id, config):
1519
self.user_id = user_id
1620
self.social_account_id = social_account_id
21+
self.client_id = config.get("github", 'client_id')
22+
self.client_secret = config.get("github", 'client_secret')
1723
self.access_token = None
24+
self.g = None
25+
self.github_user = None
26+
self.github_user_detail = None
27+
self.users_discovered = set()
1828

1929
def get_user_details_from_db(self):
2030
user = User.query.get(self.user_id)
2131
social_account = SocialUser.query.get(self.social_account_id)
2232
self.access_token = social_account.access_token
2333

34+
35+
def init_github(self):
36+
self.g = Github(self.access_token,
37+
client_id=self.client_id,
38+
client_secret=self.client_secret,
39+
per_page=100)
40+
41+
self.github_user = self.g.get_user()
42+
self.github_user_detail = GithubUserList.create(user=self.github_user.login)
43+
44+
def get_starred_projects(self):
45+
starred = self.github_user.get_starred()
46+
projects = []
47+
#with BatchQuery() as b:
48+
for s in starred:
49+
projects.append(s.full_name)
50+
self.users_discovered.add(s.owner.login)
51+
52+
GithubProject.create(
53+
id=s.id,
54+
name=s.name,
55+
full_name=s.full_name,
56+
watchers_count=s.watchers,
57+
description=s.description,
58+
homepage=s.homepage,
59+
fork=s.fork,
60+
forks_count=s.forks,
61+
language=s.language,
62+
master_branch=s.master_branch,
63+
network_count=0,
64+
open_issues=s.open_issues,
65+
url=s.url,
66+
is_py=s.language in ['python', 'Python'],
67+
owner=s.owner.id,
68+
hide=False,
69+
)
70+
71+
#print s.full_name, s.watchers
72+
73+
74+
self.github_user_detail.starred = projects
75+
self.github_user_detail.save()
76+
77+
def get_following_users(self):
78+
following = self.github_user.get_following()
79+
following_users = []
80+
81+
for f in following:
82+
self.users_discovered.add(f.login)
83+
following_users.append(f.login)
84+
print f
85+
86+
self.github_user_detail.following = following_users
87+
self.github_user_detail.save()
88+
89+
def get_follower_users(self):
90+
followers = self.github_user.get_followers()
91+
follower_users = []
92+
93+
for f in followers:
94+
self.users_discovered.add(f.login)
95+
follower_users.append(f.login)
96+
97+
self.github_user_detail.followers = follower_users
98+
self.github_user_detail.save()
99+
100+
def save_discovered_users(self):
101+
102+
found_ids = GithubUser.objects.filter(nick__in=list(self.users_discovered))
103+
found_id_list = []
104+
105+
for user in found_ids:
106+
found_id_list.append(user.nick)
107+
108+
missing_ids = list(set(self.users_discovered) - set(found_id_list))
109+
110+
logging.warn(found_id_list)
111+
logging.warn(self.users_discovered)
112+
113+
logging.warn("[{}] users are found".format(len(self.users_discovered)))
114+
logging.warn("[{}] users are missing".format(len(missing_ids)))
115+
116+
#return
117+
118+
for nick in missing_ids:
119+
120+
user = self.g.get_user(nick)
121+
122+
logging.warn("Creating user [{}]".format(nick))
123+
124+
125+
GithubUser(nick=user.login,
126+
id=user.id,
127+
email=user.email,
128+
followers=user.followers,
129+
following=user.following,
130+
image=user.avatar_url,
131+
blog=user.blog,
132+
bio=user.bio,
133+
company=user.company,
134+
location=user.location,
135+
name=user.name,
136+
url=user.url,
137+
utype=user.type,
138+
public_repos=user.public_repos,
139+
public_gists=user.public_gists,).save()
140+
#ghuser.save()
141+
logging.warn("User[{}]created".format(nick))
142+
143+
24144
def run(self):
25145
self.get_user_details_from_db()
146+
self.init_github()
147+
self.get_starred_projects()
148+
self.get_following_users()
149+
self.get_follower_users()
150+
self.save_discovered_users()
26151
pass
27152

28153

29154
def new_github_registration(user_id, social_account_id):
30155
logging.warn("[TASK][new_github_registration]: [UserId:{}] [SAcc:{}]".format(user_id, social_account_id))
31156

32-
RegistrationGithubWorker(user_id,social_account_id).run()
157+
RegistrationGithubWorker(user_id, social_account_id, config).run()
158+
159+
160+
if __name__ == "__main__":
161+
#new_github_registration(12,5)
162+
new_github_registration(14, 13)

startup.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
2-
from pyhackers.app import start_app
3-
4-
start_app()
5-
6-
from pyhackers.tasks.project_finder import *
7-
8-
9-
importer("python+language:python")
10-
importer("cassandra+language:python")
11-
importer("sql+language:python")
1+
from pyhackers.app import start_app;start_app();
2+
from pyhackers.worker.github_worker import *;
3+
#new_github_registration(14,13)
4+
new_github_registration(12,5)
5+
6+
#from pyhackers.tasks.project_finder import *
7+
#importer("python+language:python")
8+
#importer("cassandra+language:python")
9+
#importer("sql+language:python")

0 commit comments

Comments
 (0)