Skip to content

Commit 429d57f

Browse files
committed
Save multiple ac code
1 parent 09f0ef0 commit 429d57f

File tree

1 file changed

+76
-64
lines changed

1 file changed

+76
-64
lines changed

LeetCode_AC_Code_Crawler.py

Lines changed: 76 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from selenium.webdriver.support import expected_conditions as EC
55
from selenium.webdriver.common.by import By
66
from selenium.common.exceptions import TimeoutException
7+
from selenium.webdriver.common.keys import Keys
78
import json
89
import time
910
import re
@@ -15,6 +16,8 @@
1516
outputDir = ""
1617
driver = ""
1718

19+
timeout = 5 # second
20+
1821
suffix = {"cpp": "cpp", "cplusplus": "cpp", "c++": "cpp", "c": "c",
1922
"java": "java", "python": "py", "py": "py", "c#": "cs",
2023
"csharp": "cs", "javascript": "js", "js": "js", "ruby": "rb",
@@ -46,64 +49,58 @@ def save_ac_code(ac_list, premium):
4649

4750
url = ac["url"]
4851
difficulty = level[ac["difficulty"]]
49-
ac_submission = ""
52+
ac_submission_list = ""
5053

51-
while len(ac_submission) == 0:
54+
while len(ac_submission_list) == 0:
5255
driver.get(url)
5356
time.sleep(1)
5457
soup = BeautifulSoup(driver.page_source, "html.parser")
55-
ac_submission = soup.find_all("a", text="Accepted")
56-
print ac_submission[0]
57-
58-
driver.get("https://leetcode.com" + ac_submission[0]["href"])
59-
soup = BeautifulSoup(driver.page_source, "html.parser")
60-
61-
#get submission details
62-
testcase = soup.find("span", id = "result_progress").text
63-
runtime = soup.find("span", id = "result_runtime").text
64-
ranking = soup.find("div", style="line-height: 1em; position: relative;")
65-
if ranking:
66-
ranking = ranking.text.replace("\n", " ").replace(" ", "")[15:]
67-
else:
68-
ranking = "Your runtime beats 00.00 % of cpp submissions."
69-
submission_detail = ("/*\n" + "Submission Detail:{" +
70-
"\n Difficulty : " + difficulty +
71-
"\n Acceptance Rate : " + str(ac["ac_rate"]*100)[:5] + " %" +
72-
"\n Runtime : " + runtime +
73-
"\n Testcase : " + testcase + " passed" +
74-
"\n Ranking : " + ranking +
75-
"\n}\n*/\n\n")
76-
77-
#get ac code
78-
script = soup.find("script", text = re.compile("submissionCode:"))
79-
code = re.findall("submissionCode:\s*'(.+)'", script.string)[0].decode("unicode-escape")
80-
suff = suffix_conversion(re.findall("getLangDisplay:\s*'(.+)'", script.string)[0])
81-
82-
id = str(ac["id"])
83-
if len(id) < 2:
84-
id = "00" + id
85-
elif len(id) < 3:
86-
id = "0" + id
87-
88-
folderName = id + ". " + ac["title"].strip()
89-
if not os.path.exists(outputDir + "\\" + folderName):
90-
os.makedirs(outputDir + "\\" + folderName)
91-
92-
completeName = os.path.join(
93-
outputDir + "\\" + folderName, "{}.{}".format("Solution", suff))
94-
sys.stdout.write(" "*60 + "\r")
95-
if not os.path.exists(completeName):
96-
print(folderName + " saved.")
97-
file = open(completeName, "w")
98-
file.write(submission_detail+code)
99-
file.close()
100-
elif os.path.exists(completeName) and overWrite.lower() == "yes":
101-
print(folderName + " over-written.")
102-
file = open(completeName, "w")
103-
file.write(submission_detail+code)
104-
file.close()
105-
elif os.path.exists(completeName) and overWrite.lower() == "no":
106-
print(folderName + " skipped.")
58+
ac_submission_list = soup.find_all("a", text="Accepted")
59+
60+
for i in range(len(ac_submission_list)):
61+
driver.get("https://leetcode.com" + ac_submission_list[i]["href"])
62+
soup = BeautifulSoup(driver.page_source, "html.parser")
63+
64+
# Get submission details
65+
testcase = soup.find("span", id = "result_progress").text
66+
runtime = soup.find("span", id = "result_runtime").text
67+
ranking = soup.find("div", style="line-height: 1em; position: relative;")
68+
if ranking:
69+
ranking = ranking.text.replace("\n", " ").replace(" ", "")[15:]
70+
else:
71+
ranking = "Your runtime beats 00.00 % of submissions."
72+
submission_detail = ("/*\n" + "Submission Detail:{" +
73+
"\n Difficulty : " + difficulty +
74+
"\n Acceptance Rate : " + str(ac["ac_rate"]*100)[:5] + " %" +
75+
"\n Runtime : " + runtime +
76+
"\n Testcase : " + testcase + " passed" +
77+
"\n Ranking : " + ranking +
78+
"\n}\n*/\n\n")
79+
80+
# Get ac code
81+
script = soup.find("script", text = re.compile("submissionCode:"))
82+
code = re.findall("submissionCode:\s*'(.+)'", script.string)[0].decode("unicode-escape")
83+
suff = suffix_conversion(re.findall("getLangDisplay:\s*'(.+)'", script.string)[0])
84+
85+
folderName = str(ac["id"]).zfill(4) + ". " + ac["title"].strip()
86+
if not os.path.exists(outputDir + "\\" + folderName):
87+
os.makedirs(outputDir + "\\" + folderName)
88+
89+
completeName = os.path.join(
90+
outputDir + "\\" + folderName, "{}.{}".format("Solution" + str(i).zfill(2), suff))
91+
sys.stdout.write(" "*60 + "\r")
92+
if not os.path.exists(completeName):
93+
print(folderName + " saved.")
94+
file = open(completeName, "w")
95+
file.write(submission_detail+code)
96+
file.close()
97+
elif os.path.exists(completeName) and overWrite.lower() == "yes":
98+
print(folderName + " over-written.")
99+
file = open(completeName, "w")
100+
file.write(submission_detail+code)
101+
file.close()
102+
elif os.path.exists(completeName) and overWrite.lower() == "no":
103+
print(folderName + " skipped.")
107104

108105
processed_nums += 1
109106

@@ -154,15 +151,26 @@ def login():
154151
usernameField.send_keys(username)
155152
passwordField.send_keys(password)
156153

157-
time.sleep(1)
158-
driver.find_element_by_id("signin_btn").click()
159-
160-
delay = 5 # seconds
161-
try:
162-
myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, "home-app")))
163-
#print "Page is ready!"
164-
except TimeoutException:
165-
print "Loading took too much time!"
154+
while True:
155+
try:
156+
WebDriverWait(driver, timeout).until(
157+
EC.presence_of_element_located((By.ID, "signin_btn")))
158+
driver.find_element_by_id("signin_btn").click()
159+
break
160+
except:
161+
print("Unexpected error: " + str(sys.exc_info()[0]))
162+
time.sleep(1)
163+
164+
while True:
165+
try:
166+
WebDriverWait(driver, timeout).until(
167+
EC.presence_of_element_located((By.ID, "home-app")))
168+
break
169+
except TimeoutException:
170+
print("Loading took too much time!")
171+
else:
172+
print("Username or password is empty")
173+
sys.exit(1)
166174

167175
def premium_account_check():
168176
soup = BeautifulSoup(driver.page_source, "html.parser")
@@ -183,8 +191,12 @@ def suffix_conversion(suff="cpp"):
183191
password = conf["Password"]
184192
outputDir = conf["OutputDir"]
185193
driverPath = conf["ChromedriverPath"]
186-
driver = webdriver.Chrome(driverPath)
187194

195+
if not os.path.isdir(outputDir):
196+
print("Output directory not found")
197+
sys.exit(1)
198+
199+
driver = webdriver.Chrome(driverPath)
188200
login()
189201
premium = premium_account_check()
190202
ac_list = get_ac_problem_list()

0 commit comments

Comments
 (0)