Skip to content

Commit 56d0d86

Browse files
authored
Merge pull request #4 from digithree/add-karakeep-tagging
Add tagging support to Karakeep export
2 parents 6efc935 + 8174b21 commit 56d0d86

File tree

1 file changed

+251
-21
lines changed

1 file changed

+251
-21
lines changed

pocket_to_sqlite/utils.py

Lines changed: 251 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import time
55
import logging
66
import hashlib
7+
import uuid
78
from sqlite_utils.db import AlterError, ForeignKey
89
from requests.exceptions import RequestException, Timeout, HTTPError
910

@@ -185,8 +186,9 @@ def __init__(self, auth, sleep=1, retry_sleep=3):
185186
self.auth = auth
186187
self.sleep = sleep
187188
self.retry_sleep = retry_sleep
188-
self.base_url = auth.get("karakeep_base_url", "https://localhost:3000")
189+
self.base_url = auth.get("karakeep_base_url", "https://try.karakeep.app")
189190
self.token = auth["karakeep_token"]
191+
self._tags_cache = None
190192

191193
def create_bookmark(self, title, summary, url):
192194
"""
@@ -275,6 +277,110 @@ def create_bookmark(self, title, summary, url):
275277
raise Exception(f"Karakeep API request failed after 5 retries: {e}")
276278

277279
raise Exception(f"Karakeep API request failed after 5 retries")
280+
281+
def get_all_tags(self):
282+
"""
283+
Fetch all tags from Karakeep.
284+
285+
Returns:
286+
Dict with tag names as keys and tag data as values
287+
288+
Raises:
289+
Exception: If API call fails
290+
"""
291+
if self._tags_cache is not None:
292+
return self._tags_cache
293+
294+
headers = {
295+
'Accept': 'application/json',
296+
'Authorization': f'Bearer {self.token}'
297+
}
298+
299+
try:
300+
logging.debug("Fetching all tags from Karakeep")
301+
response = requests.get(
302+
f"{self.base_url}/api/v1/tags",
303+
headers=headers,
304+
timeout=30
305+
)
306+
307+
response.raise_for_status()
308+
data = response.json()
309+
310+
# Convert to dict with tag names as keys for easy lookup
311+
self._tags_cache = {tag["name"]: tag for tag in data.get("tags", [])}
312+
logging.debug(f"Fetched {len(self._tags_cache)} tags from Karakeep")
313+
314+
return self._tags_cache
315+
316+
except Exception as e:
317+
raise Exception(f"Failed to fetch tags from Karakeep: {e}")
318+
319+
def add_tags_to_bookmark(self, bookmark_id, tag_names):
320+
"""
321+
Add tags to a bookmark in Karakeep.
322+
323+
Args:
324+
bookmark_id: The ID of the bookmark to tag
325+
tag_names: List of tag names to add
326+
327+
Returns:
328+
Response data from Karakeep API
329+
330+
Raises:
331+
Exception: If API call fails
332+
"""
333+
if not tag_names:
334+
return {"attached": []}
335+
336+
# Get existing tags to check which ones already exist
337+
existing_tags = self.get_all_tags()
338+
339+
# Prepare tags payload
340+
tags_payload = []
341+
for tag_name in tag_names:
342+
if tag_name in existing_tags:
343+
# Use existing tag
344+
tag_data = existing_tags[tag_name]
345+
tags_payload.append({
346+
"tagId": tag_data["id"],
347+
"tagName": tag_data["name"]
348+
})
349+
else:
350+
# Create new tag with random ID
351+
tag_id = str(uuid.uuid4()).replace("-", "")[:16] # 16 char random ID
352+
tags_payload.append({
353+
"tagId": tag_id,
354+
"tagName": tag_name
355+
})
356+
# Add to cache so subsequent uses in same session will find it
357+
existing_tags[tag_name] = {"id": tag_id, "name": tag_name}
358+
359+
payload = {"tags": tags_payload}
360+
361+
headers = {
362+
'Content-Type': 'application/json',
363+
'Accept': 'application/json',
364+
'Authorization': f'Bearer {self.token}'
365+
}
366+
367+
try:
368+
logging.debug(f"Adding {len(tags_payload)} tags to bookmark {bookmark_id}")
369+
response = requests.post(
370+
f"{self.base_url}/api/v1/bookmarks/{bookmark_id}/tags",
371+
json=payload,
372+
headers=headers,
373+
timeout=30
374+
)
375+
376+
response.raise_for_status()
377+
result = response.json()
378+
379+
logging.debug(f"Successfully added tags to bookmark {bookmark_id}: {result.get('attached', [])}")
380+
return result
381+
382+
except Exception as e:
383+
raise Exception(f"Failed to add tags to bookmark {bookmark_id}: {e}")
278384

279385

280386
def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None, filter_favorite=False):
@@ -294,6 +400,13 @@ def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None,
294400
"""
295401
client = KarakeepClient(auth)
296402

403+
# Pre-fetch tags from Karakeep to enable tag matching
404+
try:
405+
client.get_all_tags()
406+
logging.debug("Successfully pre-fetched tags from Karakeep")
407+
except Exception as e:
408+
logging.warning(f"Failed to pre-fetch tags from Karakeep: {e}")
409+
297410
# Build query conditions
298411
conditions = []
299412
params = []
@@ -307,14 +420,41 @@ def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None,
307420

308421
where_clause = " WHERE " + " AND ".join(conditions) if conditions else ""
309422

310-
# Build SQL query
311-
sql = f"""
312-
SELECT item_id, resolved_title, given_title, resolved_url, given_url, excerpt
313-
FROM items
314-
{where_clause}
315-
ORDER BY item_id
316-
LIMIT ? OFFSET ?
317-
"""
423+
# Check if tags column exists
424+
columns = [col[1] for col in db.execute("PRAGMA table_info(items)")]
425+
has_tags_column = "tags" in columns
426+
427+
# Build query with or without tags column
428+
if has_tags_column:
429+
sql = f"""
430+
SELECT
431+
item_id,
432+
resolved_title,
433+
given_title,
434+
resolved_url,
435+
given_url,
436+
excerpt,
437+
tags
438+
FROM items
439+
{where_clause}
440+
ORDER BY item_id
441+
LIMIT ? OFFSET ?
442+
"""
443+
else:
444+
sql = f"""
445+
SELECT
446+
item_id,
447+
resolved_title,
448+
given_title,
449+
resolved_url,
450+
given_url,
451+
excerpt,
452+
NULL as tags
453+
FROM items
454+
{where_clause}
455+
ORDER BY item_id
456+
LIMIT ? OFFSET ?
457+
"""
318458

319459
# Add limit and offset to params
320460
final_limit = limit if limit is not None else -1 # SQLite uses -1 for no limit
@@ -331,7 +471,7 @@ def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None,
331471

332472
# Convert row to dict for easier access
333473
row_dict = dict(row) if hasattr(row, 'keys') else dict(zip([
334-
'item_id', 'resolved_title', 'given_title', 'resolved_url', 'given_url', 'excerpt'
474+
'item_id', 'resolved_title', 'given_title', 'resolved_url', 'given_url', 'excerpt', 'tags'
335475
], row))
336476

337477
# Map Pocket item to Karakeep bookmark
@@ -354,12 +494,55 @@ def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None,
354494
success_count += 1
355495
logging.debug(f"Successfully exported item {row_dict['item_id']}")
356496

497+
# Handle tags if they exist
498+
tags_result = None
499+
bookmark_id = result.get("id")
500+
tags_data = row_dict.get("tags")
501+
502+
logging.debug(f"Item {row_dict['item_id']}: bookmark_id={bookmark_id}, tags_data={tags_data}")
503+
504+
if bookmark_id and tags_data:
505+
# Parse tags from Pocket format: {"programming": {"tag": "programming", "item_id": "131375573"}, ...}
506+
tag_names = []
507+
try:
508+
if tags_data.startswith('{'):
509+
# JSON format from Pocket API
510+
tags_obj = json.loads(tags_data)
511+
# Extract tag names from the nested structure
512+
tag_names = [tag_info.get("tag", tag_key) for tag_key, tag_info in tags_obj.items() if isinstance(tag_info, dict)]
513+
# Fallback to keys if tag field not found
514+
if not tag_names:
515+
tag_names = list(tags_obj.keys())
516+
else:
517+
# Comma-separated format (fallback)
518+
tag_names = [tag.strip() for tag in tags_data.split(",") if tag.strip()]
519+
except (json.JSONDecodeError, AttributeError, TypeError):
520+
# Fallback to comma-separated
521+
tag_names = [tag.strip() for tag in str(tags_data).split(",") if tag.strip()]
522+
523+
logging.debug(f"Item {row_dict['item_id']}: parsed {len(tag_names)} tags: {tag_names}")
524+
525+
if tag_names:
526+
try:
527+
tags_result = client.add_tags_to_bookmark(bookmark_id, tag_names)
528+
logging.debug(f"Successfully added {len(tag_names)} tags to bookmark {bookmark_id}")
529+
except Exception as e:
530+
logging.warning(f"Failed to add tags to bookmark {bookmark_id}: {e}")
531+
else:
532+
logging.debug(f"Item {row_dict['item_id']}: no valid tags found after parsing")
533+
else:
534+
if not bookmark_id:
535+
logging.debug(f"Item {row_dict['item_id']}: no bookmark_id from Karakeep response")
536+
if not tags_data:
537+
logging.debug(f"Item {row_dict['item_id']}: no tags data in database")
538+
357539
yield {
358540
"item_id": row_dict["item_id"],
359541
"status": "success",
360542
"title": title,
361543
"url": url,
362-
"karakeep_response": result
544+
"karakeep_response": result,
545+
"tags_response": tags_result
363546
}
364547

365548
except Exception as e:
@@ -402,14 +585,41 @@ def preview_export_items(db, limit=None, offset=0, filter_status=None, filter_fa
402585

403586
where_clause = " WHERE " + " AND ".join(conditions) if conditions else ""
404587

405-
# Build SQL query
406-
sql = f"""
407-
SELECT item_id, resolved_title, given_title, resolved_url, given_url, excerpt
408-
FROM items
409-
{where_clause}
410-
ORDER BY item_id
411-
LIMIT ? OFFSET ?
412-
"""
588+
# Check if tags column exists
589+
columns = [col[1] for col in db.execute("PRAGMA table_info(items)")]
590+
has_tags_column = "tags" in columns
591+
592+
# Build query with or without tags column
593+
if has_tags_column:
594+
sql = f"""
595+
SELECT
596+
item_id,
597+
resolved_title,
598+
given_title,
599+
resolved_url,
600+
given_url,
601+
excerpt,
602+
tags
603+
FROM items
604+
{where_clause}
605+
ORDER BY item_id
606+
LIMIT ? OFFSET ?
607+
"""
608+
else:
609+
sql = f"""
610+
SELECT
611+
item_id,
612+
resolved_title,
613+
given_title,
614+
resolved_url,
615+
given_url,
616+
excerpt,
617+
NULL as tags
618+
FROM items
619+
{where_clause}
620+
ORDER BY item_id
621+
LIMIT ? OFFSET ?
622+
"""
413623

414624
# Add limit and offset to params
415625
final_limit = limit if limit is not None else -1 # SQLite uses -1 for no limit
@@ -418,7 +628,7 @@ def preview_export_items(db, limit=None, offset=0, filter_status=None, filter_fa
418628
for row in db.execute(sql, params):
419629
# Convert row to dict for easier access
420630
row_dict = dict(row) if hasattr(row, 'keys') else dict(zip([
421-
'item_id', 'resolved_title', 'given_title', 'resolved_url', 'given_url', 'excerpt'
631+
'item_id', 'resolved_title', 'given_title', 'resolved_url', 'given_url', 'excerpt', 'tags'
422632
], row))
423633

424634
# Map Pocket item to Karakeep bookmark
@@ -433,9 +643,29 @@ def preview_export_items(db, limit=None, offset=0, filter_status=None, filter_fa
433643
"reason": "no_url"
434644
}
435645
else:
646+
tags_data = row_dict.get("tags")
647+
tag_names = []
648+
if tags_data:
649+
try:
650+
if tags_data.startswith('{'):
651+
# JSON format from Pocket API: {"programming": {"tag": "programming", "item_id": "131375573"}, ...}
652+
tags_obj = json.loads(tags_data)
653+
# Extract tag names from the nested structure
654+
tag_names = [tag_info.get("tag", tag_key) for tag_key, tag_info in tags_obj.items() if isinstance(tag_info, dict)]
655+
# Fallback to keys if tag field not found
656+
if not tag_names:
657+
tag_names = list(tags_obj.keys())
658+
else:
659+
# Comma-separated format (fallback)
660+
tag_names = [tag.strip() for tag in tags_data.split(",") if tag.strip()]
661+
except (json.JSONDecodeError, AttributeError, TypeError):
662+
# Fallback to comma-separated
663+
tag_names = [tag.strip() for tag in str(tags_data).split(",") if tag.strip()]
664+
436665
yield {
437666
"item_id": row_dict["item_id"],
438667
"status": "preview",
439668
"title": title,
440-
"url": url
669+
"url": url,
670+
"tags": tag_names
441671
}

0 commit comments

Comments
 (0)