|
| 1 | +import requests |
| 2 | +import json |
| 3 | +import sys |
| 4 | +import urllib |
| 5 | +import time |
| 6 | +import random |
| 7 | +from datetime import datetime |
| 8 | + |
| 9 | + |
| 10 | +class FeedlyClient: |
| 11 | + def __init__(self, file_str, db, *args, **kwargs): |
| 12 | + self.file_str = file_str |
| 13 | + self.db = db |
| 14 | + self.prefix = 'https://cloud.feedly.com/v3' |
| 15 | + |
| 16 | + with open(file_str, 'r') as f: |
| 17 | + options = json.load(f) |
| 18 | + # print(options) |
| 19 | + |
| 20 | + self.client_id = options.get('client_id') |
| 21 | + self.client_secret = options.get('client_secret') |
| 22 | + self.access_token = options.get('access_token') |
| 23 | + self.refresh_token = options.get('refresh_token') |
| 24 | + self.id = options.get('id') |
| 25 | + self.last_fetch = options.get('last_fetch') |
| 26 | + self.my_stream_id1 = options.get('my_stream_id1') |
| 27 | + |
| 28 | + def tag_fetch(self): |
| 29 | + headers = self.auth_header() |
| 30 | + continuation = None |
| 31 | + |
| 32 | + total_fetched = 0 |
| 33 | + |
| 34 | + while True: |
| 35 | + params = {'streamId': self.my_stream_id1, |
| 36 | + 'continuation': continuation, |
| 37 | + 'ranked': 'oldest', |
| 38 | + 'newerThan': self.last_fetch} |
| 39 | + |
| 40 | + res = self._get('/streams/contents', |
| 41 | + params=params, headers=headers) |
| 42 | + |
| 43 | + total_fetched += len(res['items']) |
| 44 | + current_latest = -1 |
| 45 | + for item in res['items']: |
| 46 | + self.db.insert(item) |
| 47 | + |
| 48 | + self.last_fetch = max(self.last_fetch, item['actionTimestamp']) |
| 49 | + current_latest = max(current_latest, item['actionTimestamp']) |
| 50 | + |
| 51 | + print('{} entries fetched, current latest {}!'.format( |
| 52 | + len(res['items']), datetime.fromtimestamp(current_latest/1000))) |
| 53 | + |
| 54 | + if 'continuation' not in res: |
| 55 | + break |
| 56 | + continuation = res['continuation'] |
| 57 | + |
| 58 | + time.sleep(random.randint(2, 4)) |
| 59 | + |
| 60 | + self.last_fetch += 1 |
| 61 | + self._config_update('last_fetch', self.last_fetch) |
| 62 | + print('total {} entries fetched! latest {}'.format( |
| 63 | + total_fetched, datetime.fromtimestamp(self.last_fetch/1000))) |
| 64 | + |
| 65 | + return total_fetched |
| 66 | + |
| 67 | + def _get(self, endpoint, params=None, headers=None): |
| 68 | + path = self.prefix + endpoint |
| 69 | + r = requests.get(path, params=params, headers=headers) |
| 70 | + |
| 71 | + if r.status_code == 401: |
| 72 | + self._renew_access_token() |
| 73 | + headers['Authorization'] = self.auth_header()['Authorization'] |
| 74 | + return self._get(endpoint, params, headers) |
| 75 | + |
| 76 | + try: |
| 77 | + r.raise_for_status() |
| 78 | + except: |
| 79 | + print(r.json(), flush=True, file=sys.stderr) |
| 80 | + raise |
| 81 | + |
| 82 | + return r.json() |
| 83 | + |
| 84 | + def auth_header(self): |
| 85 | + return {'Authorization': 'Bearer {}'.format(self.access_token)} |
| 86 | + |
| 87 | + def _renew_access_token(self): |
| 88 | + data = { |
| 89 | + 'refresh_token': self.refresh_token, |
| 90 | + 'client_id': self.client_id, |
| 91 | + 'client_secret': self.client_secret, |
| 92 | + 'grant_type': 'refresh_token' |
| 93 | + } |
| 94 | + |
| 95 | + r = requests.post(self.prefix+'/auth/token', data=data) |
| 96 | + |
| 97 | + try: |
| 98 | + r.raise_for_status() |
| 99 | + except: |
| 100 | + print(r.json(), flush=True, file=sys.stderr) |
| 101 | + raise |
| 102 | + |
| 103 | + jr = r.json() |
| 104 | + self.access_token = jr['access_token'] |
| 105 | + self._config_update('access_token', self.access_token) |
| 106 | + |
| 107 | + print('access_token is successfully updated', |
| 108 | + flush=True, file=sys.stderr) |
| 109 | + |
| 110 | + def _config_update(self, entry, updated_value): |
| 111 | + with open(self.file_str, 'r') as f: |
| 112 | + options = json.load(f) |
| 113 | + options[entry] = updated_value |
| 114 | + with open(self.file_str, 'w') as f: |
| 115 | + json.dump(options, f) |
0 commit comments