Skip to content
This repository was archived by the owner on Dec 22, 2023. It is now read-only.

Commit d4c896e

Browse files
[ADD] Leetcode Scraper script
1 parent 8e18348 commit d4c896e

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Author: Mahesh Bharadwaj K (https://github.com/MaheshBharadwaj)
2+
3+
import os
4+
import re
5+
import sys
6+
7+
from bs4 import BeautifulSoup
8+
from selenium import webdriver
9+
from textwrap3 import wrap
10+
from webdriver_manager.chrome import ChromeDriverManager
11+
12+
options = webdriver.ChromeOptions()
13+
options.add_argument("headless") # headless chrome option
14+
15+
class InvalidCodeException(Exception):
16+
"""
17+
Invalid problem code
18+
"""
19+
pass
20+
21+
22+
def parse_problem_statement(problem_code: str):
23+
"""
24+
This function takes a Leet Code problem code as input and
25+
scrapes the problem statement from the site and returns
26+
the parsed problem statement as a text file.
27+
28+
PARAMETERS:
29+
-----------
30+
problem_code: string
31+
LeetCode problem code
32+
33+
RETURNS:
34+
--------
35+
problem_div.text: string
36+
Extracted problem statement as string after removing HTML tags
37+
"""
38+
URL = f"https://leetcode.com/problems/{problem_code}"
39+
browser = webdriver.Chrome(ChromeDriverManager().install(), options=options) # install and open chrome driver
40+
browser.get(URL)
41+
print("[SCRAPING] - ", problem_code)
42+
soup = BeautifulSoup(
43+
browser.page_source, features="html.parser"
44+
) # parse page source
45+
46+
# If invalid program code, 404 page is displayed
47+
if soup.find('div', class_='display-404'):
48+
raise InvalidCodeException
49+
50+
# Problem statement div
51+
problem_div = soup.find('div', class_=re.compile(r'content\w+ question-content\w+'))
52+
return problem_div.text
53+
54+
55+
if __name__ == "__main__":
56+
if len(sys.argv) != 2:
57+
print('Invalid Usage!\nRun: python3 leet_code_scraper.py [problem code]', file=sys.stderr)
58+
sys.exit(1)
59+
try:
60+
problem_code = sys.argv[1]
61+
parsed_problem = parse_problem_statement(problem_code)
62+
63+
with open(problem_code + '.txt', 'wt') as fout:
64+
parsed_lines = parsed_problem.split('\n')
65+
for line in parsed_lines:
66+
if len(line) < 81:
67+
print(line, file=fout)
68+
else:
69+
wrapped_lines = wrap(line, width=80) # Splitting long line into multiple lines
70+
for l in wrapped_lines:
71+
print(l, file=fout)
72+
73+
print(f"Successfully scraped {problem_code} and saved as {problem_code}.py!")
74+
75+
except InvalidCodeException:
76+
print("Invalid Problem Code! Please check the problem code provided!")
77+
78+
except Exception as e:
79+
print('Fatal: \n' + str(e))
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
bs4==0.0.1
2+
selenium==3.141.0
3+
webdriver-manager==3.2.2
4+
textwrap3==0.9.2

0 commit comments

Comments
 (0)