Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ RESUME_PARTIAL=true
# Enable detailed logging for troubleshooting
DEBUG=false

# Download subtitles/captions when available (default: true)
SUBTITLE_DOWNLOAD_ENABLED=true

# ===============================================
# ADVANCED SETTINGS
# ===============================================
Expand All @@ -83,4 +86,4 @@ COURSE_DATA_FILE=""
# ALL_VIDEO_FORMATS=false

# Log level (DEBUG, INFO, WARNING, ERROR)
# LOG_LEVEL="INFO"
# LOG_LEVEL="INFO"
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ A modern, feature-rich Python utility to download courses from Thinkific platfor
| 📄 **HTML Content** | ✅ Full | `downloader.py` | Clean extraction, formatting |
| 📚 **PDF Documents** | ✅ Full | `downloader.py` | Direct download, validation |
| 🎵 **Audio Files** | ✅ Full | `downloader.py` | MP3, M4A support |
| 📝 **Subtitles (Wistia)** | ✅ Full | `wistia_downloader.py` | Multi-language caption downloads |
| 🎯 **Quizzes** | ✅ Basic | `downloader.py` | Structure extraction |
| 🎨 **Presentations** | ✅ Full | FFmpeg merge | Multi-slide processing |

Expand All @@ -70,6 +71,7 @@ A modern, feature-rich Python utility to download courses from Thinkific platfor
- **Resume Support** - Skip existing files, continue interrupted downloads
- **Atomic Resume/Backup** - Status file is always safely backed up and updated, works on Windows, Mac, Linux
- **Multiple Quality Options** - Choose video quality (720p, 1080p, etc.)
- **Subtitle Downloads** - Automatically grab Wistia caption tracks in multiple languages
- **Comprehensive Logging** - Debug mode for troubleshooting

### 🛡️ **Safety & Compliance**
Expand Down Expand Up @@ -201,6 +203,7 @@ RATE_LIMIT_MB_S= # Rate limit in MB/s (empty = unlimited)
VALIDATE_DOWNLOADS=true # Enable file integrity validation
RESUME_PARTIAL=true # Enable resume for partial downloads
DEBUG=false # Enable debug logging
SUBTITLE_DOWNLOAD_ENABLED=true # Download subtitles/captions when available

# ===============================================
# ADVANCED SETTINGS
Expand Down
5 changes: 4 additions & 1 deletion thinkific_downloader/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Settings:
resume_partial: bool = True
debug: bool = False
course_name: str = "Course"
subtitle_download_enabled: bool = True

@classmethod
def from_env(cls):
Expand Down Expand Up @@ -67,6 +68,7 @@ def from_env(cls):
validate_downloads = os.getenv('VALIDATE_DOWNLOADS', 'true').lower() in ('1', 'true', 'yes', 'on')
resume_partial = os.getenv('RESUME_PARTIAL', 'true').lower() in ('1', 'true', 'yes', 'on')
debug = os.getenv('DEBUG', 'false').lower() in ('1', 'true', 'yes', 'on')
subtitle_download_enabled = os.getenv('SUBTITLE_DOWNLOAD_ENABLED', 'true').lower() in ('1', 'true', 'yes', 'on')

# Clean cookie data to remove Unicode characters that cause encoding issues
if cookie_data:
Expand Down Expand Up @@ -101,5 +103,6 @@ def from_env(cls):
download_delay=download_delay,
validate_downloads=validate_downloads,
resume_partial=resume_partial,
debug=debug
debug=debug,
subtitle_download_enabled=subtitle_download_enabled
)
100 changes: 83 additions & 17 deletions thinkific_downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,74 @@ def download_file_chunked(src_url: str, dst_name: str, chunk_mb: int = 1):
add_download_task(src_url, dst_path, "file")


def _load_cached_progress(cache_file: Path):
"""Return previously analyzed chapters and queued tasks from the resume cache."""
analyzed_chapters = set()
saved_tasks: List[Dict[str, Any]] = []

if not cache_file.exists():
return analyzed_chapters, saved_tasks

try:
with open(cache_file, 'r', encoding='utf-8') as f:
cache_data = json.load(f)

analyzed_chapters = set(cache_data.get('analyzed_chapters', []))
saved_tasks = cache_data.get('download_tasks', [])
print(f"📋 Found previous progress: {len(analyzed_chapters)} chapters analyzed, {len(saved_tasks)} tasks cached")

# If subtitle downloads were newly enabled, invalidate cache so we can regenerate tasks.
if SETTINGS and SETTINGS.subtitle_download_enabled and saved_tasks:
has_subtitle_tasks = any(
(task.get('content_type') or '').lower() == 'subtitle'
for task in saved_tasks
)
if not has_subtitle_tasks:
print("🆕 Subtitle support enabled — refreshing cached analysis to include captions.")
analyzed_chapters = set()
saved_tasks = []
try:
cache_file.unlink()
except OSError as exc:
print(f" ⚠️ Warning: Failed to delete cache file for refresh: {exc}")
except (json.JSONDecodeError, OSError):
analyzed_chapters = set()
saved_tasks = []

return analyzed_chapters, saved_tasks


def _restore_saved_tasks(saved_tasks: List[Dict[str, Any]]):
"""Restore cached download tasks, respecting the subtitle feature flag."""
if not saved_tasks:
return

restored_tasks = list(saved_tasks)
if SETTINGS and not SETTINGS.subtitle_download_enabled:
total_tasks = len(restored_tasks)
restored_tasks = [
task for task in restored_tasks
if (task.get('content_type') or 'video').lower() != 'subtitle'
]
skipped_count = total_tasks - len(restored_tasks)
if skipped_count > 0:
print(f"⏭️ Skipping {skipped_count} cached subtitle task(s) because subtitle downloads are disabled.")

if not restored_tasks:
return

print(f"📥 Restoring {len(restored_tasks)} previously collected download tasks...")
for task_data in restored_tasks:
add_download_task(task_data['url'], Path(task_data['dest_path']), task_data.get('content_type', 'video'))



def init_course(data: Dict[str, Any]):
"""Initialize course structure and collect ALL download tasks first."""
global COURSE_CONTENTS, ROOT_PROJECT_DIR, BASE_HOST, DOWNLOAD_TASKS

# Ensure settings/download manager are initialized so feature flags are available
init_settings()

# Initialize download tasks list
DOWNLOAD_TASKS = []
Expand All @@ -409,17 +473,7 @@ def init_course(data: Dict[str, Any]):
analyzed_chapters = set()
saved_tasks = []

if cache_file.exists():
try:
import json
with open(cache_file, 'r', encoding='utf-8') as f:
cache_data = json.load(f)
analyzed_chapters = set(cache_data.get('analyzed_chapters', []))
saved_tasks = cache_data.get('download_tasks', [])
print(f"📋 Found previous progress: {len(analyzed_chapters)} chapters analyzed, {len(saved_tasks)} tasks cached")
except:
analyzed_chapters = set()
saved_tasks = []
analyzed_chapters, saved_tasks = _load_cached_progress(cache_file)

# Derive base host from landing_page_url if available
landing = data['course'].get('landing_page_url')
Expand All @@ -430,10 +484,7 @@ def init_course(data: Dict[str, Any]):
print("\n🔍 Phase 1: Analyzing course content and collecting download links...")

# Restore saved download tasks
if saved_tasks:
print(f"📥 Restoring {len(saved_tasks)} previously collected download tasks...")
for task_data in saved_tasks:
add_download_task(task_data['url'], Path(task_data['dest_path']), task_data.get('content_type', 'video'))
_restore_saved_tasks(saved_tasks)

collect_all_download_tasks(data, analyzed_chapters, cache_file)

Expand Down Expand Up @@ -835,9 +886,24 @@ def collect_video_task_wistia(wistia_id: str, file_name: str, dest_dir: Path):
video_url = selected.get('url')
if video_url:
ext = '.mp4' # Default extension
resolved_name = filter_filename(file_name) + ext
resolved_name = filter_filename(file_name)
if not resolved_name.lower().endswith(ext):
resolved_name += ext
print(f" 📹 Found video: {resolved_name}")
add_download_task(video_url, dest_dir / resolved_name, "video")
try:
from .wistia_downloader import build_wistia_subtitle_tasks
subtitle_tasks = build_wistia_subtitle_tasks(
data.get('media') or {},
dest_dir,
resolved_name,
SETTINGS,
)
for task in subtitle_tasks:
print(f" [Subs] Queued subtitles: {Path(task['dest_path']).name}")
add_download_task(task['url'], Path(task['dest_path']), task.get('content_type', 'subtitle'))
except Exception as subtitle_error:
print(f" ⚠️ Unable to queue subtitles for {resolved_name}: {subtitle_error}")
except Exception as e:
print(f" ❌ Failed to collect Wistia video {wistia_id}: {e}")

Expand Down Expand Up @@ -1282,4 +1348,4 @@ def main(argv: List[str]):


if __name__ == '__main__':
main(sys.argv)
main(sys.argv)
Loading