Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 7f059aa

Browse files
committedSep 15, 2024
Reorganize things
1 parent 8a19448 commit 7f059aa

File tree

9 files changed

+238
-231
lines changed

9 files changed

+238
-231
lines changed
 

‎README.md

+107-58
Large diffs are not rendered by default.

‎src/kodekloud_downloader/cli.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77

88
from kodekloud_downloader.enums import Quality
99
from kodekloud_downloader.helpers import select_courses
10-
from kodekloud_downloader.main import download_course, download_quiz
11-
from kodekloud_downloader.models2 import get_all_course
10+
from kodekloud_downloader.main import (
11+
download_course,
12+
download_quiz,
13+
parse_course_from_url,
14+
)
1215
from kodekloud_downloader.models.helper import collect_all_courses
1316

1417

@@ -71,8 +74,9 @@ def dl(
7174
max_duplicate_count=max_duplicate_count,
7275
)
7376
elif validators.url(course_url):
77+
course_detail = parse_course_from_url(course_url)
7478
download_course(
75-
url=course_url,
79+
course=course_detail,
7680
cookie=cookie,
7781
quality=quality,
7882
output_dir=output_dir,

‎src/kodekloud_downloader/helpers.py

+26-20
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import logging
2+
import re
23
import string
34
from pathlib import Path
4-
from typing import List
5+
from typing import List, Optional
56

67
import prettytable
78
import requests
@@ -102,7 +103,7 @@ def download_video(url: str, output_path: Path, cookie: str, quality: str) -> No
102103
:param quality: The video quality (e.g. "720p")
103104
"""
104105
headers = {
105-
'Referer': 'https://learn.kodekloud.com/',
106+
"Referer": "https://learn.kodekloud.com/",
106107
}
107108
ydl_opts = {
108109
"format": f"bestvideo[height<={quality[:-1]}]+bestaudio/best[height<={quality[:-1]}]/best",
@@ -113,7 +114,7 @@ def download_video(url: str, output_path: Path, cookie: str, quality: str) -> No
113114
"merge_output_format": "mkv",
114115
"writesubtitles": True,
115116
"no_write_sub": True,
116-
'http_headers': headers,
117+
"http_headers": headers,
117118
}
118119
logger.debug(f"Calling download with following options: {ydl_opts}")
119120
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
@@ -149,21 +150,26 @@ def download_all_pdf(content, download_path: Path, cookie: str) -> None:
149150
file_name.write_bytes(response.content)
150151

151152

152-
def get_video_info(url: str, cookie: str):
153-
headers = {
154-
'Referer': 'https://learn.kodekloud.com/',
155-
}
156-
ydl_opts = {
157-
"skip_download": True,
158-
"print_json": True,
159-
"quiet": True,
160-
"extract_flat": True,
161-
"simulate": True,
162-
"no_warnings": True,
163-
"cookiefile": cookie,
164-
'http_headers': headers,
165-
}
153+
def parse_token(cookiefile: str) -> Optional[str]:
154+
"""
155+
Parse the session cookie from a file containing cookies.
166156
167-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
168-
info = ydl.extract_info(url, download=False)
169-
return info
157+
:param cookiefile: The path to the file containing cookies.
158+
:return: The value of the 'session-cookie' if found, otherwise None.
159+
:raises FileNotFoundError: If the cookie file does not exist.
160+
:raises IOError: If there is an error reading the file.
161+
"""
162+
cookies = {}
163+
try:
164+
with open(cookiefile, "r") as fp:
165+
for line in fp:
166+
if line.strip() and not re.match(r"^\#", line):
167+
line_fields = line.strip().split("\t")
168+
if len(line_fields) > 6:
169+
cookies[line_fields[5]] = line_fields[6]
170+
except FileNotFoundError:
171+
raise FileNotFoundError(f"The file {cookiefile} does not exist.")
172+
except IOError as e:
173+
raise IOError(f"Error reading the file {cookiefile}: {e}")
174+
175+
return cookies.get("session-cookie")

‎src/kodekloud_downloader/main.py

+43-25
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import logging
22
from collections import defaultdict
3-
from http.cookiejar import MozillaCookieJar
43
from pathlib import Path
5-
import re
64
from typing import Union
75

86
import markdownify
@@ -13,18 +11,31 @@
1311
from kodekloud_downloader.helpers import (
1412
download_all_pdf,
1513
download_video,
16-
get_video_info,
1714
is_normal_content,
1815
normalize_name,
16+
parse_token,
1917
)
18+
from kodekloud_downloader.models.course import CourseDetail
2019
from kodekloud_downloader.models.courses import Course
2120
from kodekloud_downloader.models.helper import fetch_course_detail
22-
from kodekloud_downloader.models2 import Quiz, Topic
21+
from kodekloud_downloader.models.quiz import Quiz
2322

2423
logger = logging.getLogger(__name__)
2524

2625

27-
def download_quiz(output_dir: str, sep: bool):
26+
def download_quiz(output_dir: str, sep: bool) -> None:
27+
"""
28+
Download quizzes from the API and save them as Markdown files.
29+
30+
:param output_dir: The directory path where the Markdown files will be saved.
31+
:param sep: A boolean flag indicating whether to separate each quiz into individual files.
32+
If `True`, each quiz will be saved as a separate Markdown file. If `False`,
33+
all quizzes will be combined into a single Markdown file.
34+
:return: None
35+
:raises ValueError: If `output_dir` is not a valid directory path.
36+
:raises requests.RequestException: For errors related to the HTTP request.
37+
:raises IOError: For file I/O errors.
38+
"""
2839
quiz_markdown = [] if sep else ["# KodeKloud Quiz"]
2940
response = requests.get("https://mcq-backend-main.kodekloud.com/api/quizzes/all")
3041
response.raise_for_status()
@@ -75,21 +86,21 @@ def download_quiz(output_dir: str, sep: bool):
7586
print(f"Quiz file written in {output_file}")
7687

7788

78-
def parseToken(cookiefile):
79-
"""Parse a cookies.txt file and return a dictionary of key value pairs
80-
compatible with requests."""
89+
def parse_course_from_url(url: str) -> CourseDetail:
90+
"""
91+
Parse the course slug from the given URL and fetch the course details.
8192
82-
cookies = {}
83-
with open(cookiefile, "r") as fp:
84-
for line in fp:
85-
if line.strip() and not re.match(r"^\#", line):
86-
lineFields = line.strip().split("\t")
87-
cookies[lineFields[5]] = lineFields[6]
88-
return cookies.get("session-cookie")
93+
:param url: The URL from which to extract the course slug.
94+
:return: An instance of `CourseDetail` containing the course details.
95+
:raises ValueError: If the URL does not contain a valid course slug.
96+
"""
97+
url = url.strip("/")
98+
course_slug = url.split("/")[-1]
99+
return fetch_course_detail(course_slug)
89100

90101

91102
def download_course(
92-
course: Course,
103+
course: Union[Course, CourseDetail],
93104
cookie: str,
94105
quality: str,
95106
output_dir: Union[str, Path],
@@ -98,22 +109,22 @@ def download_course(
98109
"""
99110
Download a course from KodeKloud.
100111
101-
:param url: The course URL
112+
:param course: The Course or CourseDetail object
102113
:param cookie: The user's authentication cookie
103114
:param quality: The video quality (e.g. "720p")
104115
:param output_dir: The output directory for the downloaded course
105116
:param max_duplicate_count: Maximum duplicate video before after cookie expire message will be raised
106117
"""
107118
session = requests.Session()
108-
cj = MozillaCookieJar(cookie)
109-
cj.load(ignore_discard=True, ignore_expires=True)
110-
session_token = parseToken(cookie)
119+
session_token = parse_token(cookie)
111120
headers = {"authorization": f"bearer {session_token}"}
112121
params = {
113122
"course_id": course.id,
114123
}
115124

116-
course_detail = fetch_course_detail(course.slug)
125+
course_detail = (
126+
fetch_course_detail(course.slug) if isinstance(course, Course) else course
127+
)
117128

118129
downloaded_videos = defaultdict(int)
119130
for module_index, module in enumerate(course_detail.modules, start=1):
@@ -133,7 +144,11 @@ def download_course(
133144
response = session.get(url, headers=headers, params=params)
134145
response.raise_for_status()
135146
lesson_video_url = response.json()["video_url"]
136-
current_video_url = f"https://player.vimeo.com/video/{lesson_video_url.split('/')[-1]}"
147+
# TODO: Maybe if in future KodeKloud change the video streaming service, this area will need some working.
148+
# Try to generalize this for future enhacement?
149+
current_video_url = (
150+
f"https://player.vimeo.com/video/{lesson_video_url.split('/')[-1]}"
151+
)
137152
if (
138153
current_video_url in downloaded_videos
139154
and downloaded_videos[current_video_url] > max_duplicate_count
@@ -178,11 +193,13 @@ def create_file_path(
178193
)
179194

180195

181-
def download_video_lesson(lesson_video_url, file_path: Path, cookie: str, quality: str) -> None:
196+
def download_video_lesson(
197+
lesson_video_url, file_path: Path, cookie: str, quality: str
198+
) -> None:
182199
"""
183200
Download a video lesson.
184201
185-
:param lesson: The lesson object
202+
:param lesson_video_url: The lesson video URL
186203
:param file_path: The output file path for the video
187204
:param cookie: The user's authentication cookie
188205
:param quality: The video quality (e.g. "720p")
@@ -212,10 +229,11 @@ def download_resource_lesson(lesson_url, file_path: Path, cookie: str) -> None:
212229
"""
213230
Download a resource lesson.
214231
215-
:param lesson: The lesson object
232+
:param lesson_url: The lesson url
216233
:param file_path: The output file path for the resource
217234
:param cookie: The user's authentication cookie
218235
"""
236+
# TODO: Did we break this? I have no idea.
219237
page = requests.get(lesson_url)
220238
soup = BeautifulSoup(page.content, "html.parser")
221239
content = soup.find("div", class_="learndash_content_wrap")

‎src/kodekloud_downloader/models/course.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
from pydantic import BaseModel, HttpUrl
21
from typing import List, Optional
32

3+
from pydantic import BaseModel, HttpUrl
4+
45

56
class Category(BaseModel):
67
id: str

‎src/kodekloud_downloader/models/courses.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
from pydantic import BaseModel, HttpUrl
21
from typing import List, Optional
32

3+
from pydantic import BaseModel, HttpUrl
4+
45

56
class Category(BaseModel):
67
id: str

‎src/kodekloud_downloader/models/helper.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import requests
21
from typing import List
2+
3+
import requests
4+
35
from kodekloud_downloader.models.course import CourseDetail
4-
from kodekloud_downloader.models.courses import Course
5-
from kodekloud_downloader.models.courses import ApiResponse
6+
from kodekloud_downloader.models.courses import ApiResponse, Course
67

78

89
def fetch_courses(page: int, limit: int) -> ApiResponse:
+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import concurrent.futures
2+
from dataclasses import dataclass
3+
from typing import Dict, List, Optional
4+
5+
import requests
6+
7+
8+
@dataclass
9+
class QuizQuestion:
10+
_id: Dict[str, str]
11+
type: int
12+
correctAnswers: List[str]
13+
code: Dict[str, str]
14+
question: str
15+
answers: List[str]
16+
labels: Optional[List[str]] = None
17+
documentationLink: Optional[str] = None
18+
explanation: Optional[str] = None
19+
topic: Optional[str] = None
20+
21+
22+
@dataclass
23+
class Quiz:
24+
_id: Dict[str, str]
25+
questions: Dict[str, str]
26+
name: Optional[str] = None
27+
topic: Optional[str] = None
28+
projectId: Optional[str] = None
29+
order: Optional[str] = None
30+
31+
def fetch_questions(self) -> List[QuizQuestion]:
32+
quiz_questions = []
33+
34+
def fetch_question(question_id):
35+
params = {
36+
"id": question_id,
37+
}
38+
url = "https://mcq-backend-main.kodekloud.com/api/questions/question"
39+
response = requests.get(url, params=params)
40+
response.raise_for_status()
41+
if question_json := response.json():
42+
quiz_questions.append(QuizQuestion(**question_json))
43+
44+
with concurrent.futures.ThreadPoolExecutor() as executor:
45+
executor.map(fetch_question, self.questions.values())
46+
47+
return quiz_questions
There was a problem loading the remainder of the diff.

0 commit comments

Comments
 (0)
Please sign in to comment.