Skip to content

Commit be981c4

Browse files
committed
init
0 parents  commit be981c4

File tree

4 files changed

+200
-0
lines changed

4 files changed

+200
-0
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
*.json
2+
__pycache__
3+
*.ipynb
4+
.ipynb_checkpoints
5+
*.db

DataBase.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import json
2+
from pandas.io.json import json_normalize
3+
import dataset
4+
import threading
5+
from numpy import int64
6+
7+
8+
def serialize_list(j):
9+
jcp = {}
10+
for key in j:
11+
if type(j[key]) == list:
12+
if len(j[key]) == 0:
13+
pass
14+
elif len(j[key]) == 1:
15+
jcp[key] = j[key][0]
16+
else:
17+
for i in range(len(j[key])):
18+
jcp['{}_{}'.format(key, i)] = j[key][i]
19+
elif type(j[key]) == dict:
20+
jcp[key] = serialize_list(j[key])
21+
else:
22+
jcp[key] = j[key]
23+
return jcp
24+
25+
26+
def flatten(j: 'json'):
27+
j1 = serialize_list(j)
28+
j2 = json_normalize(j1, sep='$').iloc[0].to_dict()
29+
30+
for key in j2:
31+
if type(j2[key]) == int64:
32+
j2[key] = j2[key].item()
33+
34+
if j2[key] == False:
35+
j2[key] = 'False'
36+
elif j2[key] == True:
37+
j2[key] = 'True'
38+
return j2
39+
40+
41+
class database:
42+
def __init__(self, db_str, table_str, *args, **kwargs):
43+
self.db = dataset.connect(db_str)
44+
45+
if table_str not in self.db.tables:
46+
self.db.create_table(table_str, primary_id='pid')
47+
self.table = self.db[table_str]
48+
49+
# pass the full origin response json, the database will do the formatting
50+
def insert(self, j):
51+
assert type(j) == dict
52+
53+
def insert_thread():
54+
flatten_j = flatten(j)
55+
self.table.insert(flatten_j)
56+
57+
#t = threading.Thread(target = insert_thread)
58+
# t.start()
59+
60+
insert_thread()

FeedlyClient.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import requests
2+
import json
3+
import sys
4+
import urllib
5+
import time
6+
import random
7+
from datetime import datetime
8+
9+
10+
class FeedlyClient:
11+
def __init__(self, file_str, db, *args, **kwargs):
12+
self.file_str = file_str
13+
self.db = db
14+
self.prefix = 'https://cloud.feedly.com/v3'
15+
16+
with open(file_str, 'r') as f:
17+
options = json.load(f)
18+
# print(options)
19+
20+
self.client_id = options.get('client_id')
21+
self.client_secret = options.get('client_secret')
22+
self.access_token = options.get('access_token')
23+
self.refresh_token = options.get('refresh_token')
24+
self.id = options.get('id')
25+
self.last_fetch = options.get('last_fetch')
26+
self.my_stream_id1 = options.get('my_stream_id1')
27+
28+
def tag_fetch(self):
29+
headers = self.auth_header()
30+
continuation = None
31+
32+
total_fetched = 0
33+
34+
while True:
35+
params = {'streamId': self.my_stream_id1,
36+
'continuation': continuation,
37+
'ranked': 'oldest',
38+
'newerThan': self.last_fetch}
39+
40+
res = self._get('/streams/contents',
41+
params=params, headers=headers)
42+
43+
total_fetched += len(res['items'])
44+
current_latest = -1
45+
for item in res['items']:
46+
self.db.insert(item)
47+
48+
self.last_fetch = max(self.last_fetch, item['actionTimestamp'])
49+
current_latest = max(current_latest, item['actionTimestamp'])
50+
51+
print('{} entries fetched, current latest {}!'.format(
52+
len(res['items']), datetime.fromtimestamp(current_latest/1000)))
53+
54+
if 'continuation' not in res:
55+
break
56+
continuation = res['continuation']
57+
58+
time.sleep(random.randint(2, 4))
59+
60+
self.last_fetch += 1
61+
self._config_update('last_fetch', self.last_fetch)
62+
print('total {} entries fetched! latest {}'.format(
63+
total_fetched, datetime.fromtimestamp(self.last_fetch/1000)))
64+
65+
return total_fetched
66+
67+
def _get(self, endpoint, params=None, headers=None):
68+
path = self.prefix + endpoint
69+
r = requests.get(path, params=params, headers=headers)
70+
71+
if r.status_code == 401:
72+
self._renew_access_token()
73+
headers['Authorization'] = self.auth_header()['Authorization']
74+
return self._get(endpoint, params, headers)
75+
76+
try:
77+
r.raise_for_status()
78+
except:
79+
print(r.json(), flush=True, file=sys.stderr)
80+
raise
81+
82+
return r.json()
83+
84+
def auth_header(self):
85+
return {'Authorization': 'Bearer {}'.format(self.access_token)}
86+
87+
def _renew_access_token(self):
88+
data = {
89+
'refresh_token': self.refresh_token,
90+
'client_id': self.client_id,
91+
'client_secret': self.client_secret,
92+
'grant_type': 'refresh_token'
93+
}
94+
95+
r = requests.post(self.prefix+'/auth/token', data=data)
96+
97+
try:
98+
r.raise_for_status()
99+
except:
100+
print(r.json(), flush=True, file=sys.stderr)
101+
raise
102+
103+
jr = r.json()
104+
self.access_token = jr['access_token']
105+
self._config_update('access_token', self.access_token)
106+
107+
print('access_token is successfully updated',
108+
flush=True, file=sys.stderr)
109+
110+
def _config_update(self, entry, updated_value):
111+
with open(self.file_str, 'r') as f:
112+
options = json.load(f)
113+
options[entry] = updated_value
114+
with open(self.file_str, 'w') as f:
115+
json.dump(options, f)

main.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import traceback
2+
import json
3+
4+
import FeedlyClient
5+
import DataBase
6+
7+
8+
def main():
9+
db = DataBase.database('sqlite:///feedly.db', 'feedly')
10+
11+
fdc = FeedlyClient.FeedlyClient('config.json', db)
12+
13+
fdc.tag_fetch()
14+
15+
16+
if __name__ == "__main__":
17+
try:
18+
main()
19+
except:
20+
traceback.print_exc()

0 commit comments

Comments
 (0)