Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
8730f9e
Update dependencies to latest versions
digithree May 24, 2025
7b18b4b
Merge pull request #1 from digithree/update-dependencies
digithree May 24, 2025
fd5f2ac
Update .gitignore to ignore auth.json
digithree May 24, 2025
299103a
Fix KeyError when API response missing 'list' or 'since' keys
digithree May 24, 2025
1d2170e
Fix OperationalError when items table doesn't exist in ensure_fts
digithree May 24, 2025
3e2907f
Fix Pocket API calls - use POST method for all endpoints
digithree May 24, 2025
c965bf1
Add debug logging to diagnose API fetch issues
digithree May 24, 2025
1d530dd
Fix Pocket API request format and add proper error handling
digithree May 24, 2025
877b5ee
Fix 413 Payload Too Large error with automatic page size reduction
digithree May 24, 2025
78e3559
Fix error detection to check error value, not just key presence
digithree May 24, 2025
f2a6519
Support both numeric and string author ID schemas
digithree May 24, 2025
4277c4d
Fix incremental fetching using offset-based approach instead of times…
digithree May 24, 2025
ee1e964
Merge pull request #2 from digithree/fix-keyerror-list-missing
digithree May 24, 2025
71668a6
Add Karakeep export command with comprehensive functionality
digithree May 24, 2025
b01e342
Update Karakeep client to handle actual API response formats
digithree May 24, 2025
53a3115
Add comprehensive export command documentation to README
digithree May 24, 2025
6efc935
Merge pull request #3 from digithree/add-karakeep-export-command
digithree May 25, 2025
aecfc0c
Add tagging support to Karakeep export functionality
digithree May 25, 2025
25c2131
Update tag parsing to handle correct Pocket API format
digithree May 25, 2025
8174b21
Add detailed tag processing debug logging
digithree May 25, 2025
56d0d86
Merge pull request #4 from digithree/add-karakeep-tagging
digithree May 25, 2025
0c1f284
Update pocket_to_sqlite/cli.py
digithree Jun 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ __pycache__/
venv
.eggs
.pytest_cache
*.egg-info
*.egg-info
auth.json
69 changes: 69 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,75 @@ On subsequent runs it will only fetch new items.

You can force it to fetch everything from the beginning again using `--all`. Use `--silent` to disable the progress bar.

## Exporting to Karakeep

You can export your Pocket bookmarks to [Karakeep](https://karakeep.com/) using the `export` command.

First, add your Karakeep credentials to the `auth.json` file created by the `auth` command:

```json
{
"pocket_consumer_key": "...",
"pocket_access_token": "...",
"karakeep_token": "your-karakeep-api-token",
"karakeep_base_url": "https://your-karakeep-instance.com"
}
```

Then export your bookmarks:

```bash
pocket-to-sqlite export pocket.db
```

### Export Options

**Filter by status:**
```bash
# Export only unread items
pocket-to-sqlite export pocket.db --filter-status 0

# Export only archived items
pocket-to-sqlite export pocket.db --filter-status 1

# Export only deleted items
pocket-to-sqlite export pocket.db --filter-status 2
```

**Filter by favorites:**
```bash
pocket-to-sqlite export pocket.db --filter-favorite
```

**Batching and resuming:**
```bash
# Export first 100 items
pocket-to-sqlite export pocket.db --limit 100

# Resume from item 500
pocket-to-sqlite export pocket.db --offset 500 --limit 100
```

**Preview before exporting:**
```bash
# Dry-run to see what would be exported
pocket-to-sqlite export pocket.db --dry-run --limit 10
```

**Other options:**
```bash
# Use custom auth file
pocket-to-sqlite export pocket.db --auth /path/to/auth.json

# Suppress progress output
pocket-to-sqlite export pocket.db --silent

# Enable debug logging
pocket-to-sqlite export pocket.db --debug
```

The export command includes retry logic for network timeouts and rate limiting, progress tracking, and comprehensive error handling.

## Using with Datasette

The SQLite database produced by this tool is designed to be browsed using [Datasette](https://datasette.readthedocs.io/). Use the [datasette-render-timestamps](https://github.com/simonw/datasette-render-timestamps) plugin to improve the display of the timestamp values.
167 changes: 154 additions & 13 deletions pocket_to_sqlite/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,26 +81,167 @@ def auth(auth):
)
@click.option("--all", is_flag=True, help="Fetch all items (not just new ones)")
@click.option("-s", "--silent", is_flag=True, help="Don't show progress bar")
def fetch(db_path, auth, all, silent):
@click.option("--debug", is_flag=True, help="Enable debug logging")
def fetch(db_path, auth, all, silent, debug):
"Save Pocket data to a SQLite database"
if debug:
import logging
logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s')
print("Debug logging enabled")

auth = json.load(open(auth))
db = sqlite_utils.Database(db_path)
last_since = None
if not all and db["since"].exists():
last_since = db["since"].get(1)["since"]
fetch = utils.FetchItems(
auth,
since=last_since,
record_since=lambda since: db["since"].insert(
{"id": 1, "since": since}, replace=True, pk="id"
),
)
if (all or last_since is None) and not silent:

# For incremental fetch, start from the number of items already in DB
start_offset = 0
if not all and "items" in db.table_names():
start_offset = db["items"].count()
if debug:
print(f"Found {start_offset} existing items, starting from offset {start_offset}")

fetch = utils.FetchItems(auth, start_offset=start_offset)
if (all or start_offset == 0) and not silent:
total_items = utils.fetch_stats(auth)["count_list"]
with click.progressbar(fetch, length=total_items, show_pos=True) as bar:
utils.save_items(bar, db)
else:
# No progress bar
print("Fetching items since {}".format(last_since))
print("Fetching items from offset {}".format(start_offset))
utils.save_items(fetch, db)
utils.ensure_fts(db)


@cli.command()
@click.argument("database")
@click.option(
"-a",
"--auth",
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
default="auth.json",
help="Path to auth tokens, defaults to auth.json",
)
@click.option("--limit", type=int, help="Maximum number of items to export")
@click.option("--offset", type=int, default=0, help="Number of items to skip")
@click.option("--filter-status", type=click.Choice(['0', '1', '2']), help="Only export items with status (0=unread, 1=archived, 2=deleted)")
@click.option("--filter-favorite", is_flag=True, help="Only export favorited items")
@click.option("--dry-run", is_flag=True, help="Show what would be exported without making API calls")
@click.option("-s", "--silent", is_flag=True, help="Suppress progress output")
@click.option("--debug", is_flag=True, help="Enable debug logging")
def export(database, auth, limit, offset, filter_status, filter_favorite, dry_run, silent, debug):
"""Export bookmarks from SQLite database to Karakeep"""
if debug:
import logging
logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s')
print("Debug logging enabled")

# Load auth file
try:
auth_data = json.load(open(auth))
except FileNotFoundError:
raise click.ClickException(f"Auth file not found: {auth}")
except json.JSONDecodeError:
raise click.ClickException(f"Invalid JSON in auth file: {auth}")

# Validate Karakeep credentials
if "karakeep_token" not in auth_data:
raise click.ClickException(f"Missing 'karakeep_token' in auth file: {auth}")

# Open database
try:
db = sqlite_utils.Database(database)
except Exception as e:
raise click.ClickException(f"Could not open database: {e}")

# Check if items table exists
if "items" not in db.table_names():
raise click.ClickException("No 'items' table found in database. Run 'fetch' command first.")

# Convert filter_status to int if provided
filter_status_int = int(filter_status) if filter_status is not None else None

# Get total count for progress tracking
count_conditions = []
count_params = []

if filter_status_int is not None:
count_conditions.append("status = ?")
count_params.append(filter_status_int)

if filter_favorite:
count_conditions.append("favorite = 1")

count_where = " WHERE " + " AND ".join(count_conditions) if count_conditions else ""
count_result = list(db.execute(f"SELECT COUNT(*) as count FROM items{count_where}", count_params))[0]
total_items = count_result[0] if isinstance(count_result, tuple) else count_result["count"]

if not silent:
print(f"Found {total_items} items to export")
if filter_status_int is not None:
status_names = {0: "unread", 1: "archived", 2: "deleted"}
print(f"Filtering by status: {status_names.get(filter_status_int, filter_status_int)}")
if filter_favorite:
print("Filtering by favorites only")
if limit:
print(f"Limiting to {limit} items")
if offset:
print(f"Starting from offset {offset}")
if dry_run:
print("DRY RUN - No actual API calls will be made")

if dry_run:
# Show what would be exported
for result in utils.preview_export_items(
db, limit=limit, offset=offset,
filter_status=filter_status_int, filter_favorite=filter_favorite
):
if result["status"] == "skipped":
print(f"[SKIP] Item {result['item_id']}: {result['reason']}")
else:
print(f"[EXPORT] Item {result['item_id']}: {result.get('title', 'No title')[:60]}...")
print(f" URL: {result.get('url', 'No URL')}")
else:
# Actual export with progress bar
success_count = 0
error_count = 0
skip_count = 0

export_iter = utils.export_items_to_karakeep(
db, auth_data, limit=limit, offset=offset,
filter_status=filter_status_int, filter_favorite=filter_favorite
)

if not silent:
# Determine progress bar length
progress_length = min(limit, total_items - offset) if limit else (total_items - offset)

with click.progressbar(export_iter, length=progress_length, show_pos=True,
label="Exporting") as bar:
for result in bar:
if result["status"] == "success":
success_count += 1
elif result["status"] == "error":
error_count += 1
if debug:
print(f"\nError exporting item {result['item_id']}: {result['error']}")
elif result["status"] == "skipped":
skip_count += 1
else:
# No progress bar
for result in export_iter:
if result["status"] == "success":
success_count += 1
elif result["status"] == "error":
error_count += 1
elif result["status"] == "skipped":
skip_count += 1

if not silent:
print(f"\nExport completed:")
print(f" Successfully exported: {success_count}")
if skip_count > 0:
print(f" Skipped: {skip_count}")
if error_count > 0:
print(f" Errors: {error_count}")

if error_count > 0:
raise click.ClickException(f"Export completed with {error_count} errors")
Loading