Skip to content

Commit 38ec813

Browse files
authored
Add files via upload
1 parent 701991d commit 38ec813

File tree

3 files changed

+149
-0
lines changed

3 files changed

+149
-0
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Slideshare to PDF
2+
3+
## Aim
4+
5+
The main aim of this script is to get a PDF file from the slideshare presentaion link.
6+
7+
## Purpose
8+
9+
To download a presentation from slideshare we have to sign up to scribd, but with this script you can download any presentation from the slideshare without any signup, provided that you have a good internet connection.
10+
11+
## Short description of package/script
12+
13+
The script uses PIL, requests, beautifulsoup, modules to scrape the images from the presentation and then convert it to a pdf file.
14+
The success rate of the script depends on the speed of your internet connection, so for the best results try connecting to a good network.
15+
16+
## Setup instructions
17+
18+
To install the dependencies
19+
20+
`pip install -r requirements.txt`
21+
22+
To try the script
23+
24+
`python main.py`
25+
26+
- A GUI will open up, enter your link, press _Get PDF_
27+
- Wait until it shows file downloaded successfully in the bottom of the GUI window.
28+
- After the success message, your file should be in the current working diirectory.
29+
30+
## Output
31+
32+
### Main Window:
33+
34+
![](./Images/window.png)
35+
<br/>
36+
37+
### Downloaded file:
38+
39+
![](./Images/Output.png)
40+
41+
## Author(s)
42+
43+
[Vivek](https://github.com/vivekthedev)
44+
45+
Questions? [Twitter](https://twitter.com/vivekthedev)

GUIScripts/Slideshare to PDF/main.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Built in modules
2+
import io
3+
from tkinter import *
4+
import pathlib
5+
6+
# External Modules
7+
import requests
8+
from bs4 import BeautifulSoup
9+
import PIL.Image
10+
import validators
11+
12+
# Get Current Working Directory path
13+
path = pathlib.Path().resolve()
14+
15+
16+
def get_pdf():
17+
URL = url_var.get()
18+
19+
# Only perform scraping if the url is valid.
20+
if validators.url(URL):
21+
r = requests.get(URL)
22+
soup = BeautifulSoup(r.content, "html5lib")
23+
imgs = soup.find_all("img", class_="slide-image")
24+
25+
# Get every image with class slide-image
26+
imgs = soup.find_all("img", class_="slide-image")
27+
28+
# Strip out preferred image resolution from the srcset of the img tag
29+
imgSRC = [
30+
x.get("srcset").split(",")[-1].strip().split(" ")[0].split("?")[0]
31+
for x in imgs
32+
]
33+
34+
# List to store all the image objects
35+
imagesJPG = []
36+
37+
for index, link in enumerate(imgSRC):
38+
try:
39+
# Get image content from the image url
40+
im = requests.get(link)
41+
42+
# Convert that image content to a BytesIO file object which is in-memory object,
43+
# so we don't have to download the image.
44+
f = io.BytesIO(im.content)
45+
46+
# Converting that BytesIO object to Image Object for PIL to convert it in PDF
47+
imgJPG = PIL.Image.open(f)
48+
imagesJPG.append(imgJPG)
49+
50+
except Exception as e:
51+
# Program will fail if the request isn't able to make a proper connection
52+
info_label_2.configure(text="Some Connection ERROR")
53+
54+
# Appending all the images object after the first image and exporting it as a PDF in cwd.
55+
imagesJPG[0].save(
56+
f"{soup.title.string}.pdf", save_all=True, append_images=imagesJPG[1:]
57+
)
58+
info_label_2.configure(text=f"File Downloaded to\n{path}")
59+
else:
60+
info_label_2.configure(text=f"Please provide a valid link")
61+
62+
63+
# Basic Tkinter window setup
64+
base = Tk()
65+
base.geometry("300x300")
66+
base.title("Slideshare to PDF")
67+
base.resizable(False, False)
68+
base.configure(background="aliceblue")
69+
70+
# Variable to store user's link and a entry field,
71+
# a button and a label
72+
url_var = StringVar()
73+
val_entry = Entry(base, textvariable=url_var, width="30")
74+
val_entry.place(x=50, y=50)
75+
76+
button = Button(
77+
base, text="Get PDF", command=get_pdf, width="25", height="2", bg="grey"
78+
)
79+
button.place(x=50, y=100)
80+
info_label = Label(
81+
base,
82+
text="Enter the presentaion link\nMake sure to have a good internet connection.",
83+
)
84+
85+
# label to show error and success message to the user
86+
info_label_2 = Label(base, text="")
87+
88+
info_label.place(x=35, y=200)
89+
info_label_2.place(x=50, y=250)
90+
base.mainloop()
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
beautifulsoup4==4.10.0
2+
bs4==0.0.1
3+
certifi==2021.10.8
4+
charset-normalizer==2.0.10
5+
decorator==5.1.1
6+
html5lib==1.1
7+
idna==3.3
8+
Pillow==9.0.0
9+
requests==2.27.1
10+
six==1.16.0
11+
soupsieve==2.3.1
12+
urllib3==1.26.8
13+
validators==0.18.2
14+
webencodings==0.5.1

0 commit comments

Comments
 (0)