Skip to content

Commit 248976f

Browse files
committed
LeadGeneration
1 parent 3ef36e0 commit 248976f

File tree

1 file changed

+126
-61
lines changed

1 file changed

+126
-61
lines changed

navigate/navigateDomain.py

Lines changed: 126 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -27,108 +27,173 @@
2727

2828
form_fields = ['firstname', 'lastname', 'name', 'email', 'company', 'company_website', 'phone', 'contact_reason', 'message']
2929

30+
phone_fields = ['phone', 'your-phone']
31+
32+
def FillForm(driver):
33+
for field in form_fields:
34+
try:
35+
form_element = driver.find_element(By.NAME, field)
36+
if form_element:
37+
form_element.send_keys('John Doe')
38+
except:
39+
pass
40+
41+
def ExtractData():
42+
pass
43+
3044
def FindForm(driver): # check whether form exist in the frame.
3145
ret = 0
46+
is_phone = 0
3247

33-
# check form exist
48+
# check if form exist
3449
for field in form_fields:
3550
try:
36-
name_field = driver.find_element(By.NAME, field)
37-
if name_field:
38-
name_field.send_keys('John Doe')
39-
time.sleep(5)
40-
# print('success')
51+
form_element = driver.find_element(By.NAME, field)
52+
if form_element:
4153
ret = 1
54+
except:
55+
pass
56+
57+
# check phone field exist
58+
for field in phone_fields:
59+
try:
60+
phone_field = driver.find_element(By.NAME, field)
61+
if phone_field:
62+
# name_field.send_keys('John Doe')
63+
is_phone = 1
4264
except:
4365
# print('failed')
4466
pass
4567

68+
if ret == 1 and is_phone == 0:
69+
print('no phone field.')
70+
ret &= is_phone
4671

47-
# check if phone number field exist
72+
# check if there's captcha break
4873
try:
49-
name_field = driver.find_element(By.NAME, 'phone')
50-
if name_field:
51-
name_field.send_keys('John Doe')
52-
time.sleep(5)
53-
# print('success')
54-
ret = 1
74+
captcha_element = driver.find_element(By.CLASS_NAME, 'grecaptcha-logo')
75+
if(captcha_element):
76+
print('captcha detected')
77+
ret = 0
5578
except:
56-
ret = 0
79+
pass
80+
5781
return ret
5882

59-
def FormExist(url): # check whether form exist in the page.
83+
def LeadGeneration(driver): # check whether form exist in the page.
6084

61-
chrome_options = webdriver.ChromeOptions()
62-
driver = webdriver.Chrome(options = chrome_options)
63-
driver.get(url)
85+
# chrome_options = webdriver.ChromeOptions()
86+
# driver = webdriver.Chrome(options = chrome_options)
87+
# driver.get(url)
6488
form_exist = FindForm(driver)
6589

66-
frames = driver.find_elements(By.TAG_NAME, 'iframe')
67-
for index, frame in enumerate(frames):
68-
# Switch to each frame by index
69-
driver.switch_to.frame(index)
70-
71-
# Perform operations within the frame
72-
# ...
73-
form_exist |= FindForm(driver)
74-
75-
# Switch back to the default content before moving to the next frame
76-
driver.switch_to.default_content()
77-
7890
if form_exist:
79-
print('exist')
91+
FillForm(driver)
92+
ExtractData()
8093
else:
81-
print('do not exist')
82-
time.sleep(5)
83-
driver.quit()
94+
frames = driver.find_elements(By.TAG_NAME, 'iframe')
95+
for index, frame in enumerate(frames):
96+
# Switch to each frame by index
97+
driver.switch_to.frame(index)
98+
99+
# Perform operations within the frame
100+
# ...
101+
form_exist |= FindForm(driver)
102+
if form_exist:
103+
FillForm(driver)
104+
ExtractData()
105+
driver.switch_to.default_content()
106+
break
107+
108+
# Switch back to the default content before moving to the next frame
109+
driver.switch_to.default_content()
84110

85-
def LeadGeneration():
86-
pass
111+
if form_exist:
112+
print('form filled')
113+
else:
114+
print('this page was skipped')
115+
return form_exist
87116

88-
def navigate_domain(url):
117+
def NavigateDomain(url):
89118
chrome_options = webdriver.ChromeOptions()
90119
#chrome_options.page_load_strategy = self.load_mode
91120
driver = webdriver.Chrome(options = chrome_options)
92121
# chrome_options.add_argument('--ignore-certificate-errors')
93122
# chrome_options.add_argument('--allow-running-insecure-content')
94-
123+
95124
driver.get(url)
96-
97-
contact_link = driver.find_element(By.XPATH, "//a[.//span[contains(text(), 'Contact')]]")
98-
contact_link.click()
99-
driver.switch_to.frame('hs-form-iframe-0')
100-
# try:
101-
# WebDriverWait(driver, 100).until(
102-
# EC.presence_of_element_located((By.NAME, 'firstname'))
103-
# )
104-
# name_field = driver.find_element(By.NAME, 'firstname')
105-
# name_field.send_keys('John Doe')
106-
# print('Field found and filled.')
107-
# except Exception as e:
108-
# print('Field not found:', e)
109-
110-
111-
name_field = driver.find_element(By.NAME, 'firstname')
112-
if name_field:
113-
name_field.send_keys('John Doe')
114-
time.sleep(5)
115-
print('success')
125+
126+
button_expression = f"//button[contains(text(), 'Accept')]"
127+
try:
128+
accept_link = driver.find_element(By.XPATH, button_expression)
129+
if accept_link:
130+
print('accept button clicked')
131+
accept_link.click()
132+
except:
133+
pass
134+
135+
form_filled = LeadGeneration(driver)
136+
if not form_filled:
137+
for word in anchor_words:
138+
if(form_filled):
139+
break
140+
print(word)
141+
# within <span>
142+
xpath_expression = f"//a[.//span[contains(text(), '{word}')]]"
143+
try:
144+
anchor_link = driver.find_element(By.XPATH, xpath_expression)
145+
146+
if anchor_link:
147+
anchor_link.click()
148+
form_filled = LeadGeneration(driver)
149+
driver.back()
150+
except:
151+
pass
152+
153+
# without <span>
154+
xpath_expression = f"//a[contains(text(), '{word}')]"
155+
try:
156+
anchor_link = driver.find_element(By.XPATH, xpath_expression)
157+
158+
if anchor_link:
159+
print('anchor: ' + word)
160+
anchor_link.click()
161+
form_filled = LeadGeneration(driver)
162+
driver.back()
163+
except:
164+
pass
165+
166+
time.sleep(5)
116167
# while True:
117168
# pass
118169
driver.quit()
119170

120171

121172

122-
# navigate_domain('https://chassi.com/')
123-
# navigate_domain('https://parkstreet.com/')
124-
FormExist('https://chassi.com/contact/')
173+
# NavigateDomain('https://adstage.io/')
174+
# NavigateDomain('https://capeanalytics.com/')
175+
# NavigateDomain('https://dotloop.com/')
176+
# NavigateDomain('https://homelight.com/')
177+
# NavigateDomain('https://indinero.com/')
178+
# NavigateDomain('https://jyve.com/')
179+
# NavigateDomain('https://parkstreet.com/')
180+
# NavigateDomain('https://goguardian.com/')
181+
# NavigateDomain('https://claylacy.com/')
182+
# NavigateDomain('https://marketshareonline.com/')
183+
NavigateDomain('https://intrepidib.com/')
184+
125185

126186
'''
127187
128188
parkstreet.com // success
129189
chassi.com/contact // frame, no-phone
130190
https://www.claylacy.com/contact-us/ // success
131191
https://info.marketshareonline.com/contact // capcha
192+
https://intrepidib.com/contact-us/ // your-phone
193+
https://www.alpertandalpert.com/contact-us.html // no phone
194+
https://yscouts.com/contact/ // no phone
195+
https://valorglobal.com/get-a-quote/ // captcha
196+
https://myfw.com/contact/ // captcha
132197
133198
firstname
134199
lastname

0 commit comments

Comments
 (0)