Skip to content

Commit 2b5f7cf

Browse files
digithreeclaude
andcommitted
Add robust error handling for malformed Pocket data
- Add try-catch blocks around item and author processing - Skip malformed author data (e.g. non-numeric author_id values) - Continue processing even when individual items have errors - Add silent parameter to suppress error warnings when needed - Add comprehensive test coverage for: * Malformed author data (non-numeric IDs) * Mixed valid/invalid author data * Items with processing errors Fixes crash when Pocket API returns malformed author data like 'author_id': 'Sandra E. Garcia' instead of numeric values. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 78e3559 commit 2b5f7cf

File tree

3 files changed

+133
-32
lines changed

3 files changed

+133
-32
lines changed

pocket_to_sqlite/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ def fetch(db_path, auth, all, silent, debug):
104104
if (all or last_since is None) and not silent:
105105
total_items = utils.fetch_stats(auth)["count_list"]
106106
with click.progressbar(fetch, length=total_items, show_pos=True) as bar:
107-
utils.save_items(bar, db)
107+
utils.save_items(bar, db, silent=silent)
108108
else:
109109
# No progress bar
110110
print("Fetching items since {}".format(last_since))
111-
utils.save_items(fetch, db)
111+
utils.save_items(fetch, db, silent=silent)
112112
utils.ensure_fts(db)

pocket_to_sqlite/utils.py

Lines changed: 52 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,39 +6,62 @@
66
from sqlite_utils.db import AlterError, ForeignKey
77

88

9-
def save_items(items, db):
9+
def save_items(items, db, silent=False):
1010
count = 0
1111
for item in items:
1212
count += 1
13-
logging.debug(f"Processing item {count}: {item.get('item_id', 'unknown')}")
14-
transform(item)
15-
authors = item.pop("authors", None)
16-
items_authors_to_save = []
17-
if authors:
18-
authors_to_save = []
19-
for details in authors.values():
20-
authors_to_save.append(
21-
{
22-
"author_id": int(details["author_id"]),
23-
"name": details["name"],
24-
"url": details["url"],
25-
}
26-
)
27-
items_authors_to_save.append(
28-
{
29-
"author_id": int(details["author_id"]),
30-
"item_id": int(details["item_id"]),
31-
}
13+
item_id = item.get('item_id', 'unknown')
14+
logging.debug(f"Processing item {count}: {item_id}")
15+
16+
try:
17+
transform(item)
18+
authors = item.pop("authors", None)
19+
items_authors_to_save = []
20+
if authors:
21+
authors_to_save = []
22+
for details in authors.values():
23+
try:
24+
# Safely convert author_id to int, skip if invalid
25+
author_id = int(details["author_id"])
26+
item_id_int = int(details["item_id"])
27+
28+
authors_to_save.append(
29+
{
30+
"author_id": author_id,
31+
"name": details["name"],
32+
"url": details["url"],
33+
}
34+
)
35+
items_authors_to_save.append(
36+
{
37+
"author_id": author_id,
38+
"item_id": item_id_int,
39+
}
40+
)
41+
except (ValueError, KeyError) as e:
42+
# Skip malformed author data
43+
if not silent:
44+
print(f"Warning: Skipping malformed author data for item {item_id}: {e}")
45+
logging.warning(f"Skipping malformed author data for item {item_id}: {e}")
46+
continue
47+
48+
if authors_to_save:
49+
db["authors"].insert_all(authors_to_save, pk="author_id", replace=True)
50+
51+
db["items"].insert(item, pk="item_id", alter=True, replace=True)
52+
if items_authors_to_save:
53+
db["items_authors"].insert_all(
54+
items_authors_to_save,
55+
pk=("author_id", "item_id"),
56+
foreign_keys=("author_id", "item_id"),
57+
replace=True,
3258
)
33-
db["authors"].insert_all(authors_to_save, pk="author_id", replace=True)
34-
db["items"].insert(item, pk="item_id", alter=True, replace=True)
35-
if items_authors_to_save:
36-
db["items_authors"].insert_all(
37-
items_authors_to_save,
38-
pk=("author_id", "item_id"),
39-
foreign_keys=("author_id", "item_id"),
40-
replace=True,
41-
)
59+
except Exception as e:
60+
# Skip entire item if there's any other error
61+
if not silent:
62+
print(f"Warning: Skipping item {item_id} due to error: {e}")
63+
logging.error(f"Skipping item {item_id} due to error: {e}")
64+
continue
4265

4366

4467
def transform(item):

tests/test_save_pocket.py

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def load():
1414
@pytest.fixture(scope="session")
1515
def converted():
1616
db = sqlite_utils.Database(":memory:")
17-
utils.save_items(load(), db)
17+
utils.save_items(load(), db, silent=True)
1818
utils.ensure_fts(db)
1919
return db
2020

@@ -220,6 +220,84 @@ def test_fetch_items_handles_error_none_success():
220220
assert items[0]["item_id"] == "1"
221221

222222

223+
def test_save_items_handles_malformed_author_data():
224+
"""Test that save_items handles malformed author data gracefully."""
225+
db = sqlite_utils.Database(":memory:")
226+
227+
# Create item with malformed author data
228+
malformed_item = {
229+
"item_id": "123",
230+
"title": "Test Item",
231+
"authors": {
232+
"1": {
233+
"author_id": "Sandra E. Garcia", # Invalid - should be numeric
234+
"name": "Sandra Garcia",
235+
"url": "http://example.com",
236+
"item_id": "123"
237+
}
238+
}
239+
}
240+
241+
# Should not crash, just skip the malformed author
242+
utils.save_items([malformed_item], db, silent=True)
243+
244+
# Item should be saved but no authors
245+
assert "items" in db.table_names()
246+
assert db["items"].count == 1
247+
assert "authors" not in db.table_names() # No valid authors to save
248+
249+
250+
def test_save_items_handles_mixed_author_data():
251+
"""Test that save_items handles mix of valid and invalid author data."""
252+
db = sqlite_utils.Database(":memory:")
253+
254+
# Create item with mixed author data
255+
mixed_item = {
256+
"item_id": "123",
257+
"title": "Test Item",
258+
"authors": {
259+
"1": {
260+
"author_id": "Sandra E. Garcia", # Invalid
261+
"name": "Sandra Garcia",
262+
"url": "http://example.com",
263+
"item_id": "123"
264+
},
265+
"2": {
266+
"author_id": "456", # Valid
267+
"name": "John Doe",
268+
"url": "http://example2.com",
269+
"item_id": "123"
270+
}
271+
}
272+
}
273+
274+
# Should save item and valid author, skip invalid author
275+
utils.save_items([mixed_item], db, silent=True)
276+
277+
# Should have item and one valid author
278+
assert db["items"].count == 1
279+
assert db["authors"].count == 1
280+
assert list(db["authors"].rows)[0]["author_id"] == 456
281+
282+
283+
def test_save_items_handles_completely_malformed_item():
284+
"""Test that save_items handles items missing required fields."""
285+
db = sqlite_utils.Database(":memory:")
286+
287+
# Create item missing item_id (will cause issues in transform)
288+
malformed_item = {"title": "No ID Item", "time_added": "not_a_number"}
289+
valid_item = {"item_id": "123", "title": "Valid Item"}
290+
291+
# Should handle errors gracefully and continue processing
292+
utils.save_items([malformed_item, valid_item], db, silent=True)
293+
294+
# Both items may be saved, but the valid one should definitely be there
295+
assert db["items"].count >= 1
296+
# Check that the valid item was saved correctly
297+
valid_items = [row for row in db["items"].rows if row.get("item_id") == 123]
298+
assert len(valid_items) == 1
299+
300+
223301
def test_ensure_fts_with_no_items_table():
224302
"""Test that ensure_fts handles case when items table doesn't exist."""
225303
db = sqlite_utils.Database(":memory:")

0 commit comments

Comments
 (0)