44import time
55import logging
66import hashlib
7+ import uuid
78from sqlite_utils .db import AlterError , ForeignKey
89from requests .exceptions import RequestException , Timeout , HTTPError
910
@@ -185,8 +186,9 @@ def __init__(self, auth, sleep=1, retry_sleep=3):
185186 self .auth = auth
186187 self .sleep = sleep
187188 self .retry_sleep = retry_sleep
188- self .base_url = auth .get ("karakeep_base_url" , "https://localhost:3000 " )
189+ self .base_url = auth .get ("karakeep_base_url" , "https://try.karakeep.app " )
189190 self .token = auth ["karakeep_token" ]
191+ self ._tags_cache = None
190192
191193 def create_bookmark (self , title , summary , url ):
192194 """
@@ -275,6 +277,110 @@ def create_bookmark(self, title, summary, url):
275277 raise Exception (f"Karakeep API request failed after 5 retries: { e } " )
276278
277279 raise Exception (f"Karakeep API request failed after 5 retries" )
280+
281+ def get_all_tags (self ):
282+ """
283+ Fetch all tags from Karakeep.
284+
285+ Returns:
286+ Dict with tag names as keys and tag data as values
287+
288+ Raises:
289+ Exception: If API call fails
290+ """
291+ if self ._tags_cache is not None :
292+ return self ._tags_cache
293+
294+ headers = {
295+ 'Accept' : 'application/json' ,
296+ 'Authorization' : f'Bearer { self .token } '
297+ }
298+
299+ try :
300+ logging .debug ("Fetching all tags from Karakeep" )
301+ response = requests .get (
302+ f"{ self .base_url } /api/v1/tags" ,
303+ headers = headers ,
304+ timeout = 30
305+ )
306+
307+ response .raise_for_status ()
308+ data = response .json ()
309+
310+ # Convert to dict with tag names as keys for easy lookup
311+ self ._tags_cache = {tag ["name" ]: tag for tag in data .get ("tags" , [])}
312+ logging .debug (f"Fetched { len (self ._tags_cache )} tags from Karakeep" )
313+
314+ return self ._tags_cache
315+
316+ except Exception as e :
317+ raise Exception (f"Failed to fetch tags from Karakeep: { e } " )
318+
319+ def add_tags_to_bookmark (self , bookmark_id , tag_names ):
320+ """
321+ Add tags to a bookmark in Karakeep.
322+
323+ Args:
324+ bookmark_id: The ID of the bookmark to tag
325+ tag_names: List of tag names to add
326+
327+ Returns:
328+ Response data from Karakeep API
329+
330+ Raises:
331+ Exception: If API call fails
332+ """
333+ if not tag_names :
334+ return {"attached" : []}
335+
336+ # Get existing tags to check which ones already exist
337+ existing_tags = self .get_all_tags ()
338+
339+ # Prepare tags payload
340+ tags_payload = []
341+ for tag_name in tag_names :
342+ if tag_name in existing_tags :
343+ # Use existing tag
344+ tag_data = existing_tags [tag_name ]
345+ tags_payload .append ({
346+ "tagId" : tag_data ["id" ],
347+ "tagName" : tag_data ["name" ]
348+ })
349+ else :
350+ # Create new tag with random ID
351+ tag_id = str (uuid .uuid4 ()).replace ("-" , "" )[:16 ] # 16 char random ID
352+ tags_payload .append ({
353+ "tagId" : tag_id ,
354+ "tagName" : tag_name
355+ })
356+ # Add to cache so subsequent uses in same session will find it
357+ existing_tags [tag_name ] = {"id" : tag_id , "name" : tag_name }
358+
359+ payload = {"tags" : tags_payload }
360+
361+ headers = {
362+ 'Content-Type' : 'application/json' ,
363+ 'Accept' : 'application/json' ,
364+ 'Authorization' : f'Bearer { self .token } '
365+ }
366+
367+ try :
368+ logging .debug (f"Adding { len (tags_payload )} tags to bookmark { bookmark_id } " )
369+ response = requests .post (
370+ f"{ self .base_url } /api/v1/bookmarks/{ bookmark_id } /tags" ,
371+ json = payload ,
372+ headers = headers ,
373+ timeout = 30
374+ )
375+
376+ response .raise_for_status ()
377+ result = response .json ()
378+
379+ logging .debug (f"Successfully added tags to bookmark { bookmark_id } : { result .get ('attached' , [])} " )
380+ return result
381+
382+ except Exception as e :
383+ raise Exception (f"Failed to add tags to bookmark { bookmark_id } : { e } " )
278384
279385
280386def export_items_to_karakeep (db , auth , limit = None , offset = 0 , filter_status = None , filter_favorite = False ):
@@ -294,6 +400,13 @@ def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None,
294400 """
295401 client = KarakeepClient (auth )
296402
403+ # Pre-fetch tags from Karakeep to enable tag matching
404+ try :
405+ client .get_all_tags ()
406+ logging .debug ("Successfully pre-fetched tags from Karakeep" )
407+ except Exception as e :
408+ logging .warning (f"Failed to pre-fetch tags from Karakeep: { e } " )
409+
297410 # Build query conditions
298411 conditions = []
299412 params = []
@@ -307,14 +420,41 @@ def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None,
307420
308421 where_clause = " WHERE " + " AND " .join (conditions ) if conditions else ""
309422
310- # Build SQL query
311- sql = f"""
312- SELECT item_id, resolved_title, given_title, resolved_url, given_url, excerpt
313- FROM items
314- { where_clause }
315- ORDER BY item_id
316- LIMIT ? OFFSET ?
317- """
423+ # Check if tags column exists
424+ columns = [col [1 ] for col in db .execute ("PRAGMA table_info(items)" )]
425+ has_tags_column = "tags" in columns
426+
427+ # Build query with or without tags column
428+ if has_tags_column :
429+ sql = f"""
430+ SELECT
431+ item_id,
432+ resolved_title,
433+ given_title,
434+ resolved_url,
435+ given_url,
436+ excerpt,
437+ tags
438+ FROM items
439+ { where_clause }
440+ ORDER BY item_id
441+ LIMIT ? OFFSET ?
442+ """
443+ else :
444+ sql = f"""
445+ SELECT
446+ item_id,
447+ resolved_title,
448+ given_title,
449+ resolved_url,
450+ given_url,
451+ excerpt,
452+ NULL as tags
453+ FROM items
454+ { where_clause }
455+ ORDER BY item_id
456+ LIMIT ? OFFSET ?
457+ """
318458
319459 # Add limit and offset to params
320460 final_limit = limit if limit is not None else - 1 # SQLite uses -1 for no limit
@@ -331,7 +471,7 @@ def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None,
331471
332472 # Convert row to dict for easier access
333473 row_dict = dict (row ) if hasattr (row , 'keys' ) else dict (zip ([
334- 'item_id' , 'resolved_title' , 'given_title' , 'resolved_url' , 'given_url' , 'excerpt'
474+ 'item_id' , 'resolved_title' , 'given_title' , 'resolved_url' , 'given_url' , 'excerpt' , 'tags'
335475 ], row ))
336476
337477 # Map Pocket item to Karakeep bookmark
@@ -354,12 +494,55 @@ def export_items_to_karakeep(db, auth, limit=None, offset=0, filter_status=None,
354494 success_count += 1
355495 logging .debug (f"Successfully exported item { row_dict ['item_id' ]} " )
356496
497+ # Handle tags if they exist
498+ tags_result = None
499+ bookmark_id = result .get ("id" )
500+ tags_data = row_dict .get ("tags" )
501+
502+ logging .debug (f"Item { row_dict ['item_id' ]} : bookmark_id={ bookmark_id } , tags_data={ tags_data } " )
503+
504+ if bookmark_id and tags_data :
505+ # Parse tags from Pocket format: {"programming": {"tag": "programming", "item_id": "131375573"}, ...}
506+ tag_names = []
507+ try :
508+ if tags_data .startswith ('{' ):
509+ # JSON format from Pocket API
510+ tags_obj = json .loads (tags_data )
511+ # Extract tag names from the nested structure
512+ tag_names = [tag_info .get ("tag" , tag_key ) for tag_key , tag_info in tags_obj .items () if isinstance (tag_info , dict )]
513+ # Fallback to keys if tag field not found
514+ if not tag_names :
515+ tag_names = list (tags_obj .keys ())
516+ else :
517+ # Comma-separated format (fallback)
518+ tag_names = [tag .strip () for tag in tags_data .split ("," ) if tag .strip ()]
519+ except (json .JSONDecodeError , AttributeError , TypeError ):
520+ # Fallback to comma-separated
521+ tag_names = [tag .strip () for tag in str (tags_data ).split ("," ) if tag .strip ()]
522+
523+ logging .debug (f"Item { row_dict ['item_id' ]} : parsed { len (tag_names )} tags: { tag_names } " )
524+
525+ if tag_names :
526+ try :
527+ tags_result = client .add_tags_to_bookmark (bookmark_id , tag_names )
528+ logging .debug (f"Successfully added { len (tag_names )} tags to bookmark { bookmark_id } " )
529+ except Exception as e :
530+ logging .warning (f"Failed to add tags to bookmark { bookmark_id } : { e } " )
531+ else :
532+ logging .debug (f"Item { row_dict ['item_id' ]} : no valid tags found after parsing" )
533+ else :
534+ if not bookmark_id :
535+ logging .debug (f"Item { row_dict ['item_id' ]} : no bookmark_id from Karakeep response" )
536+ if not tags_data :
537+ logging .debug (f"Item { row_dict ['item_id' ]} : no tags data in database" )
538+
357539 yield {
358540 "item_id" : row_dict ["item_id" ],
359541 "status" : "success" ,
360542 "title" : title ,
361543 "url" : url ,
362- "karakeep_response" : result
544+ "karakeep_response" : result ,
545+ "tags_response" : tags_result
363546 }
364547
365548 except Exception as e :
@@ -402,14 +585,41 @@ def preview_export_items(db, limit=None, offset=0, filter_status=None, filter_fa
402585
403586 where_clause = " WHERE " + " AND " .join (conditions ) if conditions else ""
404587
405- # Build SQL query
406- sql = f"""
407- SELECT item_id, resolved_title, given_title, resolved_url, given_url, excerpt
408- FROM items
409- { where_clause }
410- ORDER BY item_id
411- LIMIT ? OFFSET ?
412- """
588+ # Check if tags column exists
589+ columns = [col [1 ] for col in db .execute ("PRAGMA table_info(items)" )]
590+ has_tags_column = "tags" in columns
591+
592+ # Build query with or without tags column
593+ if has_tags_column :
594+ sql = f"""
595+ SELECT
596+ item_id,
597+ resolved_title,
598+ given_title,
599+ resolved_url,
600+ given_url,
601+ excerpt,
602+ tags
603+ FROM items
604+ { where_clause }
605+ ORDER BY item_id
606+ LIMIT ? OFFSET ?
607+ """
608+ else :
609+ sql = f"""
610+ SELECT
611+ item_id,
612+ resolved_title,
613+ given_title,
614+ resolved_url,
615+ given_url,
616+ excerpt,
617+ NULL as tags
618+ FROM items
619+ { where_clause }
620+ ORDER BY item_id
621+ LIMIT ? OFFSET ?
622+ """
413623
414624 # Add limit and offset to params
415625 final_limit = limit if limit is not None else - 1 # SQLite uses -1 for no limit
@@ -418,7 +628,7 @@ def preview_export_items(db, limit=None, offset=0, filter_status=None, filter_fa
418628 for row in db .execute (sql , params ):
419629 # Convert row to dict for easier access
420630 row_dict = dict (row ) if hasattr (row , 'keys' ) else dict (zip ([
421- 'item_id' , 'resolved_title' , 'given_title' , 'resolved_url' , 'given_url' , 'excerpt'
631+ 'item_id' , 'resolved_title' , 'given_title' , 'resolved_url' , 'given_url' , 'excerpt' , 'tags'
422632 ], row ))
423633
424634 # Map Pocket item to Karakeep bookmark
@@ -433,9 +643,29 @@ def preview_export_items(db, limit=None, offset=0, filter_status=None, filter_fa
433643 "reason" : "no_url"
434644 }
435645 else :
646+ tags_data = row_dict .get ("tags" )
647+ tag_names = []
648+ if tags_data :
649+ try :
650+ if tags_data .startswith ('{' ):
651+ # JSON format from Pocket API: {"programming": {"tag": "programming", "item_id": "131375573"}, ...}
652+ tags_obj = json .loads (tags_data )
653+ # Extract tag names from the nested structure
654+ tag_names = [tag_info .get ("tag" , tag_key ) for tag_key , tag_info in tags_obj .items () if isinstance (tag_info , dict )]
655+ # Fallback to keys if tag field not found
656+ if not tag_names :
657+ tag_names = list (tags_obj .keys ())
658+ else :
659+ # Comma-separated format (fallback)
660+ tag_names = [tag .strip () for tag in tags_data .split ("," ) if tag .strip ()]
661+ except (json .JSONDecodeError , AttributeError , TypeError ):
662+ # Fallback to comma-separated
663+ tag_names = [tag .strip () for tag in str (tags_data ).split ("," ) if tag .strip ()]
664+
436665 yield {
437666 "item_id" : row_dict ["item_id" ],
438667 "status" : "preview" ,
439668 "title" : title ,
440- "url" : url
669+ "url" : url ,
670+ "tags" : tag_names
441671 }
0 commit comments