22import requests
33import json
44import time
5+ import logging
6+ import hashlib
57from sqlite_utils .db import AlterError , ForeignKey
68
79
810def save_items (items , db ):
11+ count = 0
912 for item in items :
13+ count += 1
14+ logging .debug (f"Processing item { count } : { item .get ('item_id' , 'unknown' )} " )
1015 transform (item )
1116 authors = item .pop ("authors" , None )
1217 items_authors_to_save = []
1318 if authors :
1419 authors_to_save = []
1520 for details in authors .values ():
21+ # Handle both numeric and string author_ids
22+ author_id_raw = details ["author_id" ]
23+ try :
24+ # Try to use as integer (normal case)
25+ author_id = int (author_id_raw )
26+ author_name = details ["name" ]
27+ except ValueError :
28+ # String author_id - treat it as the name and generate unique ID
29+ author_name = author_id_raw
30+ # Generate deterministic integer ID from the string
31+ author_id = int (hashlib .md5 (author_id_raw .encode ()).hexdigest ()[:8 ], 16 )
32+
1633 authors_to_save .append (
1734 {
18- "author_id" : int ( details [ " author_id" ]) ,
19- "name" : details [ "name" ] ,
35+ "author_id" : author_id ,
36+ "name" : author_name ,
2037 "url" : details ["url" ],
2138 }
2239 )
2340 items_authors_to_save .append (
2441 {
25- "author_id" : int ( details [ " author_id" ]) ,
42+ "author_id" : author_id ,
2643 "item_id" : int (details ["item_id" ]),
2744 }
2845 )
@@ -64,36 +81,35 @@ def transform(item):
6481
6582
6683def ensure_fts (db ):
67- if "items_fts" not in db .table_names ():
84+ if "items_fts" not in db .table_names () and "items" in db . table_names () :
6885 db ["items" ].enable_fts (["resolved_title" , "excerpt" ], create_triggers = True )
6986
7087
7188def fetch_stats (auth ):
72- response = requests .get (
73- "https://getpocket.com/v3/stats" ,
74- {
75- "consumer_key" : auth ["pocket_consumer_key" ],
76- "access_token" : auth ["pocket_access_token" ],
77- },
78- )
89+ headers = {"Content-Type" : "application/x-www-form-urlencoded; charset=UTF8" }
90+ data = {
91+ "consumer_key" : auth ["pocket_consumer_key" ],
92+ "access_token" : auth ["pocket_access_token" ],
93+ }
94+ response = requests .post ("https://getpocket.com/v3/stats" , data = data , headers = headers )
7995 response .raise_for_status ()
8096 return response .json ()
8197
8298
8399class FetchItems :
84100 def __init__ (
85- self , auth , since = None , page_size = 500 , sleep = 2 , retry_sleep = 3 , record_since = None
101+ self , auth , start_offset = 0 , page_size = 50 , sleep = 2 , retry_sleep = 3
86102 ):
87103 self .auth = auth
88- self .since = since
104+ self .start_offset = start_offset
89105 self .page_size = page_size
90106 self .sleep = sleep
91107 self .retry_sleep = retry_sleep
92- self .record_since = record_since
93108
94109 def __iter__ (self ):
95- offset = 0
110+ offset = self . start_offset
96111 retries = 0
112+ logging .debug (f"Starting fetch with start_offset={ self .start_offset } , page_size={ self .page_size } " )
97113 while True :
98114 args = {
99115 "consumer_key" : self .auth ["pocket_consumer_key" ],
@@ -104,9 +120,11 @@ def __iter__(self):
104120 "count" : self .page_size ,
105121 "offset" : offset ,
106122 }
107- if self .since is not None :
108- args ["since" ] = self .since
109- response = requests .get ("https://getpocket.com/v3/get" , args )
123+
124+ logging .debug (f"Making API request to /v3/get with offset={ offset } " )
125+ headers = {"Content-Type" : "application/x-www-form-urlencoded; charset=UTF8" }
126+ response = requests .post ("https://getpocket.com/v3/get" , data = args , headers = headers )
127+ logging .debug (f"API response status: { response .status_code } " )
110128 if response .status_code == 503 and retries < 5 :
111129 print ("Got a 503, retrying..." )
112130 retries += 1
@@ -116,13 +134,36 @@ def __iter__(self):
116134 retries = 0
117135 response .raise_for_status ()
118136 page = response .json ()
119- items = list ((page ["list" ] or {}).values ())
120- next_since = page ["since" ]
121- if self .record_since and next_since :
122- self .record_since (next_since )
137+ logging .debug (f"API response keys: { list (page .keys ())} " )
138+
139+ # Check for API errors (error key present AND has a non-None value)
140+ error_msg = page .get ('error' )
141+ if error_msg is not None :
142+ logging .error (f"API returned error: { page } " )
143+
144+ # Handle payload too large by reducing page size
145+ if "413" in str (error_msg ) or "Payload Too Large" in str (error_msg ):
146+ if self .page_size > 10 :
147+ new_page_size = max (10 , self .page_size // 2 )
148+ logging .warning (f"Payload too large, reducing page size from { self .page_size } to { new_page_size } " )
149+ self .page_size = new_page_size
150+ continue # Retry with smaller page size
151+ else :
152+ raise Exception (f"Pocket API error: Even minimum page size (10) is too large: { error_msg } " )
153+
154+ raise Exception (f"Pocket API error: { error_msg } " )
155+
156+ items = list ((page .get ("list" ) or {}).values ())
157+ logging .debug (f"Found { len (items )} items in this page" )
158+
159+ next_since = page .get ("since" )
160+ logging .debug (f"Next since value: { next_since } " )
123161 if not items :
162+ logging .debug ("No more items found, breaking from loop" )
124163 break
164+ logging .debug (f"Yielding { len (items )} items" )
125165 yield from items
126166 offset += self .page_size
167+ logging .debug (f"Updated offset to { offset } " )
127168 if self .sleep :
128169 time .sleep (self .sleep )
0 commit comments