Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ RESUME_PARTIAL=true
# Enable detailed logging for troubleshooting
DEBUG=false

# Download subtitles/captions when available (default: true)
SUBTITLE_DOWNLOAD_ENABLED=true

# ===============================================
# ADVANCED SETTINGS
# ===============================================
Expand All @@ -83,4 +86,4 @@ COURSE_DATA_FILE=""
# ALL_VIDEO_FORMATS=false

# Log level (DEBUG, INFO, WARNING, ERROR)
# LOG_LEVEL="INFO"
# LOG_LEVEL="INFO"
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ downloads/
*.mkv
ffmpeg.log

# Allow HTML templates used by the offline site generator
!thinkific_downloader/templates/*.html

# But allow certain JSON files
!package.json
!requirements.json
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ A modern, feature-rich Python utility to download courses from Thinkific platfor
| 📄 **HTML Content** | ✅ Full | `downloader.py` | Clean extraction, formatting |
| 📚 **PDF Documents** | ✅ Full | `downloader.py` | Direct download, validation |
| 🎵 **Audio Files** | ✅ Full | `downloader.py` | MP3, M4A support |
| 📝 **Subtitles (Wistia)** | ✅ Full | `wistia_downloader.py` | Multi-language caption downloads |
| 🎯 **Quizzes** | ✅ Basic | `downloader.py` | Structure extraction |
| 🎨 **Presentations** | ✅ Full | FFmpeg merge | Multi-slide processing |

Expand All @@ -70,6 +71,7 @@ A modern, feature-rich Python utility to download courses from Thinkific platfor
- **Resume Support** - Skip existing files, continue interrupted downloads
- **Atomic Resume/Backup** - Status file is always safely backed up and updated, works on Windows, Mac, Linux
- **Multiple Quality Options** - Choose video quality (720p, 1080p, etc.)
- **Subtitle Downloads** - Automatically grab Wistia caption tracks in multiple languages
- **Comprehensive Logging** - Debug mode for troubleshooting

### 🛡️ **Safety & Compliance**
Expand Down Expand Up @@ -201,6 +203,7 @@ RATE_LIMIT_MB_S= # Rate limit in MB/s (empty = unlimited)
VALIDATE_DOWNLOADS=true # Enable file integrity validation
RESUME_PARTIAL=true # Enable resume for partial downloads
DEBUG=false # Enable debug logging
SUBTITLE_DOWNLOAD_ENABLED=true # Download subtitles/captions when available

# ===============================================
# ADVANCED SETTINGS
Expand Down
119 changes: 116 additions & 3 deletions thinkific_downloader/__main__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,123 @@
#!/usr/bin/env python3
"""
Command line entry point for Thinkific Downloader
Command line entry point for Thinkific Downloader and offline site generator.

Usage examples:
python -m thinkific_downloader <course_url>
python -m thinkific_downloader --json beginner-course.json
python -m thinkific_downloader generate-site beginner-course.json --clean
"""

from __future__ import annotations

import argparse
import sys
from thinkific_downloader.downloader import main
from pathlib import Path
from typing import List, Optional

from thinkific_downloader.downloader import main as downloader_main
from thinkific_downloader.site_generator import (
SiteGenerationError,
generate_site,
load_course,
)

# Note: keep console output lightweight so it mirrors existing downloader UX.


def _run_generate_site(argv: List[str]) -> int:
parser = argparse.ArgumentParser(
prog="thinkific_downloader generate-site",
description="Validate downloaded Thinkific course assets and build an offline viewer.",
)
parser.add_argument(
"metadata",
help="Path to the course metadata JSON file (e.g., beginner-chess-mastery.json).",
)
parser.add_argument(
"--downloads-dir",
dest="downloads_dir",
help="Override the downloads root directory (defaults to <metadata>/../downloads).",
)
parser.add_argument(
"--output-dir",
dest="output_dir",
help="Directory to write the generated site (defaults to downloads/<course-slug>/).",
)
parser.add_argument(
"--assets-dirname",
dest="assets_dirname",
default="site-assets",
help="Subdirectory name for bundled CSS/JS assets (default: site-assets).",
)
parser.add_argument(
"--clean",
action="store_true",
help="Remove previously generated site files before rendering.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Validate metadata and assets without writing any files.",
)
parser.add_argument(
"-q",
"--quiet",
action="store_true",
help="Suppress success output; errors will still be printed.",
)

args = parser.parse_args(argv)

metadata_path = Path(args.metadata).expanduser()
downloads_dir: Optional[Path] = None
output_dir: Optional[Path] = None

if args.downloads_dir:
downloads_dir = Path(args.downloads_dir).expanduser()
if args.output_dir:
output_dir = Path(args.output_dir).expanduser()

try:
if args.dry_run:
load_course(metadata_path, downloads_root=downloads_dir)
if not args.quiet:
print("✅ Course assets validated (dry run).")
return 0

generated_index = generate_site(
metadata_path,
downloads_root=downloads_dir,
output_dir=output_dir,
clean=args.clean,
assets_dirname=args.assets_dirname,
)
if not args.quiet:
print(f"✅ Offline course generated: {generated_index}")
return 0

except SiteGenerationError as exc:
print("✖ Site generation failed:")
for error in exc.errors:
print(f" - {error}")
return 1
except FileNotFoundError as exc:
print(f"✖ {exc}")
return 1
except Exception as exc: # pragma: no cover - unexpected edge cases
print(f"✖ Unexpected error: {exc}")
return 1


def main(argv: Optional[List[str]] = None) -> None:
argv = argv or sys.argv
if len(argv) > 1 and argv[1] in {"generate-site", "generate_site"}:
exit_code = _run_generate_site(argv[2:])
sys.exit(exit_code)

# Fallback to the legacy downloader behaviour.
downloader_main(argv)


if __name__ == "__main__":
main(sys.argv)
main(sys.argv)
5 changes: 4 additions & 1 deletion thinkific_downloader/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Settings:
resume_partial: bool = True
debug: bool = False
course_name: str = "Course"
subtitle_download_enabled: bool = True

@classmethod
def from_env(cls):
Expand Down Expand Up @@ -67,6 +68,7 @@ def from_env(cls):
validate_downloads = os.getenv('VALIDATE_DOWNLOADS', 'true').lower() in ('1', 'true', 'yes', 'on')
resume_partial = os.getenv('RESUME_PARTIAL', 'true').lower() in ('1', 'true', 'yes', 'on')
debug = os.getenv('DEBUG', 'false').lower() in ('1', 'true', 'yes', 'on')
subtitle_download_enabled = os.getenv('SUBTITLE_DOWNLOAD_ENABLED', 'true').lower() in ('1', 'true', 'yes', 'on')

# Clean cookie data to remove Unicode characters that cause encoding issues
if cookie_data:
Expand Down Expand Up @@ -101,5 +103,6 @@ def from_env(cls):
download_delay=download_delay,
validate_downloads=validate_downloads,
resume_partial=resume_partial,
debug=debug
debug=debug,
subtitle_download_enabled=subtitle_download_enabled
)
100 changes: 83 additions & 17 deletions thinkific_downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,74 @@ def download_file_chunked(src_url: str, dst_name: str, chunk_mb: int = 1):
add_download_task(src_url, dst_path, "file")


def _load_cached_progress(cache_file: Path):
"""Return previously analyzed chapters and queued tasks from the resume cache."""
analyzed_chapters = set()
saved_tasks: List[Dict[str, Any]] = []

if not cache_file.exists():
return analyzed_chapters, saved_tasks

try:
with open(cache_file, 'r', encoding='utf-8') as f:
cache_data = json.load(f)

analyzed_chapters = set(cache_data.get('analyzed_chapters', []))
saved_tasks = cache_data.get('download_tasks', [])
print(f"📋 Found previous progress: {len(analyzed_chapters)} chapters analyzed, {len(saved_tasks)} tasks cached")

# If subtitle downloads were newly enabled, invalidate cache so we can regenerate tasks.
if SETTINGS and SETTINGS.subtitle_download_enabled and saved_tasks:
has_subtitle_tasks = any(
(task.get('content_type') or '').lower() == 'subtitle'
for task in saved_tasks
)
if not has_subtitle_tasks:
print("🆕 Subtitle support enabled — refreshing cached analysis to include captions.")
analyzed_chapters = set()
saved_tasks = []
try:
cache_file.unlink()
except OSError as exc:
print(f" ⚠️ Warning: Failed to delete cache file for refresh: {exc}")
except (json.JSONDecodeError, OSError):
analyzed_chapters = set()
saved_tasks = []

return analyzed_chapters, saved_tasks


def _restore_saved_tasks(saved_tasks: List[Dict[str, Any]]):
"""Restore cached download tasks, respecting the subtitle feature flag."""
if not saved_tasks:
return

restored_tasks = list(saved_tasks)
if SETTINGS and not SETTINGS.subtitle_download_enabled:
total_tasks = len(restored_tasks)
restored_tasks = [
task for task in restored_tasks
if (task.get('content_type') or 'video').lower() != 'subtitle'
]
skipped_count = total_tasks - len(restored_tasks)
if skipped_count > 0:
print(f"⏭️ Skipping {skipped_count} cached subtitle task(s) because subtitle downloads are disabled.")

if not restored_tasks:
return

print(f"📥 Restoring {len(restored_tasks)} previously collected download tasks...")
for task_data in restored_tasks:
add_download_task(task_data['url'], Path(task_data['dest_path']), task_data.get('content_type', 'video'))



def init_course(data: Dict[str, Any]):
"""Initialize course structure and collect ALL download tasks first."""
global COURSE_CONTENTS, ROOT_PROJECT_DIR, BASE_HOST, DOWNLOAD_TASKS

# Ensure settings/download manager are initialized so feature flags are available
init_settings()

# Initialize download tasks list
DOWNLOAD_TASKS = []
Expand All @@ -409,17 +473,7 @@ def init_course(data: Dict[str, Any]):
analyzed_chapters = set()
saved_tasks = []

if cache_file.exists():
try:
import json
with open(cache_file, 'r', encoding='utf-8') as f:
cache_data = json.load(f)
analyzed_chapters = set(cache_data.get('analyzed_chapters', []))
saved_tasks = cache_data.get('download_tasks', [])
print(f"📋 Found previous progress: {len(analyzed_chapters)} chapters analyzed, {len(saved_tasks)} tasks cached")
except:
analyzed_chapters = set()
saved_tasks = []
analyzed_chapters, saved_tasks = _load_cached_progress(cache_file)

# Derive base host from landing_page_url if available
landing = data['course'].get('landing_page_url')
Expand All @@ -430,10 +484,7 @@ def init_course(data: Dict[str, Any]):
print("\n🔍 Phase 1: Analyzing course content and collecting download links...")

# Restore saved download tasks
if saved_tasks:
print(f"📥 Restoring {len(saved_tasks)} previously collected download tasks...")
for task_data in saved_tasks:
add_download_task(task_data['url'], Path(task_data['dest_path']), task_data.get('content_type', 'video'))
_restore_saved_tasks(saved_tasks)

collect_all_download_tasks(data, analyzed_chapters, cache_file)

Expand Down Expand Up @@ -835,9 +886,24 @@ def collect_video_task_wistia(wistia_id: str, file_name: str, dest_dir: Path):
video_url = selected.get('url')
if video_url:
ext = '.mp4' # Default extension
resolved_name = filter_filename(file_name) + ext
resolved_name = filter_filename(file_name)
if not resolved_name.lower().endswith(ext):
resolved_name += ext
print(f" 📹 Found video: {resolved_name}")
add_download_task(video_url, dest_dir / resolved_name, "video")
try:
from .wistia_downloader import build_wistia_subtitle_tasks
subtitle_tasks = build_wistia_subtitle_tasks(
data.get('media') or {},
dest_dir,
resolved_name,
SETTINGS,
)
for task in subtitle_tasks:
print(f" [Subs] Queued subtitles: {Path(task['dest_path']).name}")
add_download_task(task['url'], Path(task['dest_path']), task.get('content_type', 'subtitle'))
except Exception as subtitle_error:
print(f" ⚠️ Unable to queue subtitles for {resolved_name}: {subtitle_error}")
except Exception as e:
print(f" ❌ Failed to collect Wistia video {wistia_id}: {e}")

Expand Down Expand Up @@ -1282,4 +1348,4 @@ def main(argv: List[str]):


if __name__ == '__main__':
main(sys.argv)
main(sys.argv)
Loading