-
Notifications
You must be signed in to change notification settings - Fork 0
/
code.py
101 lines (96 loc) · 5.14 KB
/
code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium import webdriver
import time
import pandas as pd
from selenium.webdriver.common.by import By
lis = []
def web_scrabing(num_iter , job):
jobs =[]
## declare driver_chrome
driver = webdriver.Chrome()
url = 'https://www.glassdoor.com/Job/jobs.htm?sc.keyword="' + job + '"&locT=C&locId=1147401&locKeyword=San%20Francisco,%20CA&jobType=all&fromAge=-1&minSalary=0&includeNoSalaryJobs=true&radius=100&cityId=-1&minRating=0.0&industryId=-1&sgocId=-1&seniorityType=all&companyId=-1&employerSizes=0&applicationType=0&remoteWorkType=0'
driver.get(url)
#load page
time.sleep(5)
#create Loop for load all Jobs from page
for i in range(num_iter):
#Test for the "Sign Up" prompt and get rid of it.
try:
driver.find_element(By.CLASS_NAME,'CloseButton').click()
except:
pass
time.sleep(1) # for cancel the sign up page
#Click to 'See More' button
try:
driver.find_element(By.CLASS_NAME , 'JobsList_buttonWrapper__haBp5').click()
except:
break
time.sleep(3) #very important step -> beacuse waite the new jops
try:
driver.find_element(By.CLASS_NAME,'CloseButton').click()
except:
pass
numbers = 0
all_jobs = driver.find_elements(By.CLASS_NAME , 'JobsList_jobListItem__JBBUV ')
for job in all_jobs:
try:
driver.find_element(By.CLASS_NAME,'CloseButton').click()
except:
pass
time.sleep(1)
# open each Job in the list
job.click()
time.sleep(1) # for wait the loading
try:
driver.find_element(By.CLASS_NAME,'CloseButton').click()
except:
pass
try:
driver.find_element(By.CLASS_NAME,'JobDetails_showMore__j5Z_h').click()
except:
continue
time.sleep(1)
ok_scrabing = False
count = 0
while count < 2 and ok_scrabing == False:
try:
company_name = driver.find_element(By.XPATH,'/html/body/div[2]/div[1]/div[3]/div[2]/div[2]/div[1]/header/div[1]/a/div[2]/span').text
location = driver.find_element(By.XPATH,'/html/body/div[2]/div[1]/div[3]/div[2]/div[2]/div[1]/header/div[1]/div[2]').text
job_title = driver.find_element(By.XPATH,'/html/body/div[2]/div[1]/div[3]/div[2]/div[2]/div[1]/header/div[1]/div[1]').text
job_description = driver.find_element(By.XPATH,'/html/body/div[2]/div[1]/div[3]/div[2]/div[2]/div[1]/section/div[1]/div[1]/div').text
try:
Average_salary_estimate = driver.find_element(By.XPATH,'/html/body/div[2]/div[1]/div[3]/div[2]/div[2]/div[1]/section/section[1]/div/div[2]/div[1]/div[1]').text
except NoSuchElementException:
Average_salary_estimate = "-1" #You need to set a "not found value. It's important."
try:
rating = driver.find_element(By.XPATH,'/html/body/div[2]/div[1]/div[3]/div[2]/div[2]/div[1]/section/section[3]/div/div/div/div/div').text
except NoSuchElementException:
rating = '-1' #You need to set a "not found value. It's important."
try:
industry = driver.find_element(By.XPATH,'/html/body/div[2]/div[1]/div[3]/div[2]/div[2]/div[1]/section/section[2]/div/div/div[4]').text
except NoSuchElementException:
industry = '-1'
try:
sector = driver.find_element(By.XPATH,'/html/body/div[2]/div[1]/div[3]/div[2]/div[2]/div[1]/section/section[2]/div/div/div[5]/div').text
except NoSuchElementException:
sector = '-1'
#lis.append({"Job Title" : job_title,"Job Description" : job_description,"Company Name" : company_name,"Location" : location})
# jobs.append({"Job Title" : job_title,"Salary Estimate" : Average_salary_estimate,"Rating" : rating,"Company Name" : company_name,
# "Location" : location,
# "Industry" : industry,
# "Sector" : sector,"Job Description" : job_description})
lis.append({"Job Title" : job_title,"Salary Estimate" : Average_salary_estimate,"Rating" : rating,"Company Name" : company_name,
"Location" : location,
"Industry" : industry,
"Sector" : sector,"Job Description" : job_description})
numbers = numbers +1
print({"# --> ":numbers,"Job Title" : job_title,"Salary Estimate" : Average_salary_estimate,"Rating" : rating,"Company Name" : company_name,
"Location" : location,
"Industry" : industry,
"Sector" : sector,"Job Description" : job_description})
ok_scrabing = True
except:
time.sleep(1)
count = count +1
if __name__ == '__main__':
web_scrabing(100,'devops')