Skip to content

Commit ff5d694

Browse files
committed
WIP to merge newer with existing data
1 parent 441621a commit ff5d694

File tree

3 files changed

+59
-15
lines changed

3 files changed

+59
-15
lines changed

datauploader/api/helpers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import datetime
22
import json
33
import os
4+
import requests
45
import tempfile
56

67

@@ -27,4 +28,8 @@ def get_existing_file_ids(oh_member):
2728
if 'Github' in file_info['metadata']['tags']:
2829
id = file_info['id']
2930
ids.append(id)
30-
return ids
31+
return ids
32+
33+
34+
def download_to_json(download_url):
35+
return json.loads(requests.get(download_url).content)

datauploader/api/rest.py

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
import requests
33

44

5-
from datauploader.api.helpers import write_jsonfile_to_tmp_dir
5+
from ohapi import api
6+
7+
8+
from datauploader.api.helpers import write_jsonfile_to_tmp_dir, download_to_json
69

710
# sort order is most recently created first
811
# max page size = 100 (may not be respected or vary, but this is the max max)
@@ -17,13 +20,36 @@
1720

1821
def get_github_data(oh_access_token, gh_access_token, current_date):
1922

20-
start_dt = None # TODO: how to get the appropriate start date
21-
github_data = GithubData.from_API(gh_access_token, start_dt, current_date)
22-
full_file_name = write_jsonfile_to_tmp_dir('github.json', github_data.to_json())
23+
existing_github_data = get_last_synced_data(oh_access_token)
24+
new_github_data = GithubData.from_API(gh_access_token, existing_github_data)
25+
full_file_name = write_jsonfile_to_tmp_dir('github.json', new_github_data.to_json())
2326

2427
return full_file_name
2528

2629

30+
def get_last_synced_data(oh_access_token):
31+
download_url = get_latest_github_file_url(oh_access_token)
32+
if download_url:
33+
existing_data_json = download_to_json(download_url)
34+
last_data = GithubData.from_json(existing_data_json)
35+
else:
36+
last_data = None
37+
38+
return last_data
39+
40+
41+
def get_latest_github_file_url(oh_access_token):
42+
member = api.exchange_oauth2_member(oh_access_token)
43+
download_url = None
44+
last_updated_at = None
45+
for dfile in member['data']:
46+
if 'GoogleFit' in dfile['metadata']['tags']:
47+
if last_updated_at is None or dfile['metadata'].get('updated_at', '') >= last_updated_at:
48+
last_updated_at = dfile['metadata']['updated_at']
49+
download_url = dfile['download_url']
50+
return download_url
51+
52+
2753
def get_auth_header(github_access_token):
2854
auth_header = {"Authorization": "Bearer " + github_access_token}
2955
return auth_header
@@ -71,14 +97,23 @@ def get_user_repos(github_access_token):
7197
return results
7298

7399

74-
def get_repo_commits_for_user(github_access_token, repo, username):
100+
def get_repo_commits_for_user(github_access_token, repo, username, sync_after_date):
75101
results = []
76102
cnt = 0
77103
url = GITHUB_REPO_COMMITS_ENDPOINT.format(repo, username)
78-
while(True):
104+
# commits are fetched chronologically
105+
latest_commit_date = None
106+
while True:
79107
cnt+=1
80108
response = requests.get(url, headers=get_auth_header(github_access_token))
81-
results += json.loads(response.content)
109+
commits = json.loads(response.content)
110+
111+
if latest_commit_date is None and len(commits) > 0:
112+
# github returns the data in descending chronological order
113+
# date is in the format 2014-05-09T15:14:07Z
114+
latest_commit_date = commits[0]['commit']['committer']['date']
115+
116+
results += commits
82117
# if results['type'] == 'PushEvent'
83118
# results[0]['payload']['commits'][0]['message']
84119
next = response.links.get('next')
@@ -87,7 +122,7 @@ def get_repo_commits_for_user(github_access_token, repo, username):
87122
else:
88123
url = next['url']
89124
#print("Called the api {} times".format(cnt))
90-
return results
125+
return results, latest_commit_date
91126

92127

93128
class GithubData(object):
@@ -101,8 +136,8 @@ def __repr__(self):
101136
return str(self.metadata) + '\n' + str(self.repo_data.keys())
102137

103138
@classmethod
104-
def from_API(self, token, start_dt, end_dt):
105-
# TODO handle stopping once we reach already synced data
139+
def from_API(self, token, existing_data):
140+
106141
print("Starting rate limit status:")
107142
print(get_rate_limit_remaining(token))
108143
username = get_user_info(token).get('login')
@@ -112,9 +147,12 @@ def from_API(self, token, start_dt, end_dt):
112147
repo_data = {}
113148
for repo_name in repo_names:
114149
print("Fetching commits for {}".format(repo_name))
115-
repo_commits = get_repo_commits_for_user(token, repo_name, username)
150+
# TODO handle stopping once we reach already synced data
151+
repo_commits, latest_date = get_repo_commits_for_user(token, repo_name, username, sync_after_date=None)
116152
print("Fetched {} commits".format(len(repo_commits)))
117-
repo_data[repo_name] = {"commits": repo_commits}
153+
# TODO: here, if this repo exists in the existing data, need to merge the commits
154+
repo_data[repo_name] = {"commits": repo_commits, "last_commit_date": latest_date,
155+
"num_commits": len(repo_commits)}
118156

119157
metadata = {"username": username, "num_repos": len(repos)}
120158

@@ -124,7 +162,8 @@ def from_API(self, token, start_dt, end_dt):
124162

125163
@classmethod
126164
def from_json(self, json_data):
127-
pass
165+
metadata = json_data['metadata']
166+
return GithubData(repo_data=json_data['repo_data'], metadata=metadata)
128167

129168
def to_json(self):
130169
return {

datauploader/tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def process_github(oh_id):
4646

4747
current_dt = datetime.utcnow()
4848

49-
gh_file = gh_api.get_github_data(oh_member, github_access_token, current_dt)
49+
gh_file = gh_api.get_github_data(oh_access_token, github_access_token, current_dt)
5050

5151
existing_file_ids = get_existing_file_ids(oh_member)
5252
print(existing_file_ids)

0 commit comments

Comments
 (0)