Stepik-API/examples/save_course_slides.py at master · StepicOrg/Stepik-API · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Run with Python3
# Saves all presentations from course in pdfs and arranges them into folders
# BeautifulSoup is required, just do:  pip install beautifulsoup4

import requests
import sys
import os
from bs4 import BeautifulSoup

client_id = "..."
client_secret = "..."
api_host = "https://stepik.org/"

auth = requests.auth.HTTPBasicAuth(client_id, client_secret)
response = requests.post('https://stepik.org/oauth2/token/',
                         data={'grant_type': 'client_credentials'},
                         auth=auth)
token = response.json().get('access_token', None)
if not token:
    print('Unable to authorize with provided credentials')
    exit(1)

course_id = 0

if len(sys.argv) == 2:
    course_id = sys.argv[1]
else:
    print("Error, enter course_id")
    exit(0)


# get 1 json object
def fetch_object(obj_class, obj_id):
    api_url = '{}/api/{}s/{}'.format(api_host, obj_class, obj_id)
    response = requests.get(api_url,
                            headers={'Authorization': 'Bearer ' + token}).json()
    return response['{}s'.format(obj_class)][0]


# get json-objects with ids in right order
def fetch_objects(obj_class, obj_ids, keep_order=True):
    objs = []
    # Fetch objects by 30 items,
    # so we won't bump into HTTP request length limits
    step_size = 30
    for i in range(0, len(obj_ids), step_size):
        obj_ids_slice = obj_ids[i:i + step_size]
        api_url = '{}/api/{}s?{}'.format(api_host, obj_class,
                                         '&'.join('ids[]={}'.format(obj_id)
                                                  for obj_id in obj_ids_slice))

        response = requests.get(api_url,headers={'Authorization': 'Bearer ' + token}).json()

        objs += response['{}s'.format(obj_class)]
    if (keep_order):
        return sorted(objs, key=lambda x: obj_ids.index(x['id']))
    return objs


# convert name of section into proper name folder
def replace_characters(text):
    text = text.replace(":", " -")
    text = text.replace("?", " ")
    return text


# download pdf
def download_file(link, path):
    slides = requests.get(link)
    file_name = link[link.rfind('/') + 1:]
    with open(os.path.join(path, file_name), "wb") as pdf:
        for chunk in slides.iter_content(chunk_size=128):
            pdf.write(chunk)


# find all links with slides
def find_slides(text, path):
    soup = BeautifulSoup(text, 'html.parser')
    for link in soup.find_all('a'):
        if link.get('href') and "slides" in link.get('href'):
            print("https://stepik.org" + link.get('href'))
            download_file("https://stepik.org" + link.get('href'), path)

course = fetch_object("course", course_id)
sections = fetch_objects("section", course['sections'])

title = course['title']
workload = course['workload']
summary = course['summary']

#create info dir
current_path = os.path.dirname(os.path.abspath(__file__))
current_path = os.path.join(current_path, title)

if not os.path.exists(current_path):
    os.makedirs(current_path)
    with open(os.path.join(current_path, "readme.html"), "w") as info_file:
        print(
            "<h1>" + title + "</h1>" + "<p><b>Нагрузка</b>: " + workload + "</p>" + "<b>Коротко о курсе</b>: " + summary,
            file=info_file)
else:
    print("folder already exists")

for section in sections:
    if not os.path.exists(os.path.join(current_path, replace_characters(section['title']))):
        os.makedirs(os.path.join(current_path, replace_characters(section['title'])))


    units_id = section['units']
    units = fetch_objects('unit', units_id)

    for unit in units:
        lesson = fetch_object('lesson', unit['lesson'])
        steps = fetch_objects('step', lesson['steps'])
        for step in steps:
            text = step['block']['text']
            path = os.path.join(current_path, section['title'])
            find_slides(text, path)