22import requests
33
44
5- from datauploader .api .helpers import write_jsonfile_to_tmp_dir
5+ from ohapi import api
6+
7+
8+ from datauploader .api .helpers import write_jsonfile_to_tmp_dir , download_to_json
69
710# sort order is most recently created first
811# max page size = 100 (may not be respected or vary, but this is the max max)
1720
1821def get_github_data (oh_access_token , gh_access_token , current_date ):
1922
20- start_dt = None # TODO: how to get the appropriate start date
21- github_data = GithubData .from_API (gh_access_token , start_dt , current_date )
22- full_file_name = write_jsonfile_to_tmp_dir ('github.json' , github_data .to_json ())
23+ existing_github_data = get_last_synced_data ( oh_access_token )
24+ new_github_data = GithubData .from_API (gh_access_token , existing_github_data )
25+ full_file_name = write_jsonfile_to_tmp_dir ('github.json' , new_github_data .to_json ())
2326
2427 return full_file_name
2528
2629
30+ def get_last_synced_data (oh_access_token ):
31+ download_url = get_latest_github_file_url (oh_access_token )
32+ if download_url :
33+ existing_data_json = download_to_json (download_url )
34+ last_data = GithubData .from_json (existing_data_json )
35+ else :
36+ last_data = None
37+
38+ return last_data
39+
40+
41+ def get_latest_github_file_url (oh_access_token ):
42+ member = api .exchange_oauth2_member (oh_access_token )
43+ download_url = None
44+ last_updated_at = None
45+ for dfile in member ['data' ]:
46+ if 'GoogleFit' in dfile ['metadata' ]['tags' ]:
47+ if last_updated_at is None or dfile ['metadata' ].get ('updated_at' , '' ) >= last_updated_at :
48+ last_updated_at = dfile ['metadata' ]['updated_at' ]
49+ download_url = dfile ['download_url' ]
50+ return download_url
51+
52+
2753def get_auth_header (github_access_token ):
2854 auth_header = {"Authorization" : "Bearer " + github_access_token }
2955 return auth_header
@@ -71,14 +97,23 @@ def get_user_repos(github_access_token):
7197 return results
7298
7399
74- def get_repo_commits_for_user (github_access_token , repo , username ):
100+ def get_repo_commits_for_user (github_access_token , repo , username , sync_after_date ):
75101 results = []
76102 cnt = 0
77103 url = GITHUB_REPO_COMMITS_ENDPOINT .format (repo , username )
78- while (True ):
104+ # commits are fetched chronologically
105+ latest_commit_date = None
106+ while True :
79107 cnt += 1
80108 response = requests .get (url , headers = get_auth_header (github_access_token ))
81- results += json .loads (response .content )
109+ commits = json .loads (response .content )
110+
111+ if latest_commit_date is None and len (commits ) > 0 :
112+ # github returns the data in descending chronological order
113+ # date is in the format 2014-05-09T15:14:07Z
114+ latest_commit_date = commits [0 ]['commit' ]['committer' ]['date' ]
115+
116+ results += commits
82117 # if results['type'] == 'PushEvent'
83118 # results[0]['payload']['commits'][0]['message']
84119 next = response .links .get ('next' )
@@ -87,7 +122,7 @@ def get_repo_commits_for_user(github_access_token, repo, username):
87122 else :
88123 url = next ['url' ]
89124 #print("Called the api {} times".format(cnt))
90- return results
125+ return results , latest_commit_date
91126
92127
93128class GithubData (object ):
@@ -101,8 +136,8 @@ def __repr__(self):
101136 return str (self .metadata ) + '\n ' + str (self .repo_data .keys ())
102137
103138 @classmethod
104- def from_API (self , token , start_dt , end_dt ):
105- # TODO handle stopping once we reach already synced data
139+ def from_API (self , token , existing_data ):
140+
106141 print ("Starting rate limit status:" )
107142 print (get_rate_limit_remaining (token ))
108143 username = get_user_info (token ).get ('login' )
@@ -112,9 +147,12 @@ def from_API(self, token, start_dt, end_dt):
112147 repo_data = {}
113148 for repo_name in repo_names :
114149 print ("Fetching commits for {}" .format (repo_name ))
115- repo_commits = get_repo_commits_for_user (token , repo_name , username )
150+ # TODO handle stopping once we reach already synced data
151+ repo_commits , latest_date = get_repo_commits_for_user (token , repo_name , username , sync_after_date = None )
116152 print ("Fetched {} commits" .format (len (repo_commits )))
117- repo_data [repo_name ] = {"commits" : repo_commits }
153+ # TODO: here, if this repo exists in the existing data, need to merge the commits
154+ repo_data [repo_name ] = {"commits" : repo_commits , "last_commit_date" : latest_date ,
155+ "num_commits" : len (repo_commits )}
118156
119157 metadata = {"username" : username , "num_repos" : len (repos )}
120158
@@ -124,7 +162,8 @@ def from_API(self, token, start_dt, end_dt):
124162
125163 @classmethod
126164 def from_json (self , json_data ):
127- pass
165+ metadata = json_data ['metadata' ]
166+ return GithubData (repo_data = json_data ['repo_data' ], metadata = metadata )
128167
129168 def to_json (self ):
130169 return {
0 commit comments