Skip to content

Commit edefb3d

Browse files
committed
Update CDP Mode
1 parent 6bbca0f commit edefb3d

File tree

5 files changed

+138
-23
lines changed

5 files changed

+138
-23
lines changed

examples/cdp_mode/ReadMe.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ await tab.get_current_url()
692692
await tab.send_keys(selector, text, timeout=5)
693693
await tab.type(selector, text, timeout=5)
694694
await tab.click(selector, timeout=5)
695+
await tab.click_if_visible(selector, timeout=0)
695696
await tab.click_with_offset(selector, x, y, center=False, timeout=5)
696697
await tab.solve_captcha()
697698
await tab.click_captcha() # Same as solve_captcha()

help_docs/cdp_mode_methods.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ await tab.get_current_url()
326326
await tab.send_keys(selector, text, timeout=5)
327327
await tab.type(selector, text, timeout=5)
328328
await tab.click(selector, timeout=5)
329+
await tab.click_if_visible(selector, timeout=0)
329330
await tab.click_with_offset(selector, x, y, center=False, timeout=5)
330331
await tab.solve_captcha()
331332
await tab.click_captcha() # Same as solve_captcha()

seleniumbase/core/sb_cdp.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1995,7 +1995,7 @@ def _on_a_g_recaptcha_page(self, *args, **kwargs):
19951995
time.sleep(0.1)
19961996
return True
19971997
elif "com/recaptcha/api.js" in source:
1998-
time.sleep(1.6) # Still loading
1998+
time.sleep(1.2) # Maybe still loading
19991999
try:
20002000
self.loop.run_until_complete(self.page.wait(0.1))
20012001
except Exception:
@@ -2084,10 +2084,10 @@ def __cdp_click_incapsula_hcaptcha(self):
20842084
with suppress(Exception):
20852085
element.click_with_offset(x_offset, y_offset)
20862086
was_clicked = True
2087-
time.sleep(0.056)
2087+
time.sleep(0.075)
20882088
if was_clicked:
20892089
# Wait a moment for the click to succeed
2090-
time.sleep(0.25)
2090+
time.sleep(0.75)
20912091
self.__slow_mode_pause_if_set()
20922092
self.loop.run_until_complete(self.page.wait())
20932093
if "--debug" in sys.argv:

seleniumbase/undetected/cdp_driver/browser.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,12 @@ def get_rd_port(self):
261261
return self.config.port
262262

263263
def get_rd_url(self):
264+
"""Returns the remote-debugging URL, which is used for
265+
allowing the Playwright integration to launch stealthy.
266+
Also sets an environment variable to hide this warning:
267+
Deprecation: "url.parse() behavior is not standardized".
268+
(github.com/microsoft/playwright-python/issues/3016)"""
269+
os.environ["NODE_NO_WARNINGS"] = "1"
264270
host = self.config.host
265271
port = self.config.port
266272
return f"http://{host}:{port}"

seleniumbase/undetected/cdp_driver/tab.py

Lines changed: 127 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
import logging
66
import pathlib
77
import re
8+
import sys
89
import urllib.parse
910
import warnings
1011
from contextlib import suppress
1112
from filelock import FileLock
1213
from seleniumbase import config as sb_config
1314
from seleniumbase.fixtures import constants
1415
from seleniumbase.fixtures import js_utils
16+
from seleniumbase.fixtures import page_utils
1517
from seleniumbase.fixtures import shared_utils
1618
from typing import Dict, List, Union, Optional, Tuple
1719
from . import browser as cdp_browser
@@ -1339,16 +1341,29 @@ async def is_element_present(self, selector):
13391341
return False
13401342

13411343
async def is_element_visible(self, selector):
1342-
try:
1343-
element = await self.select(selector, timeout=0.01)
1344-
except Exception:
1345-
return False
1346-
if not element:
1347-
return False
1348-
try:
1349-
position = await element.get_position_async()
1350-
return (position.width != 0 or position.height != 0)
1351-
except Exception:
1344+
if ":contains(" not in selector:
1345+
try:
1346+
element = await self.select(selector, timeout=0.01)
1347+
except Exception:
1348+
return False
1349+
if not element:
1350+
return False
1351+
try:
1352+
position = await element.get_position_async()
1353+
return (position.width != 0 or position.height != 0)
1354+
except Exception:
1355+
return False
1356+
else:
1357+
with suppress(Exception):
1358+
tag_name = selector.split(":contains(")[0].split(" ")[-1]
1359+
text = selector.split(":contains(")[1].split(")")[0][1:-1]
1360+
element = await self.select(tag_name, timeout=0.01)
1361+
if not element:
1362+
raise Exception()
1363+
element = await self.find_element_by_text(text)
1364+
if not element:
1365+
raise Exception()
1366+
return True
13521367
return False
13531368

13541369
async def __on_a_cf_turnstile_page(self, source=None):
@@ -1369,6 +1384,11 @@ async def __on_a_cf_turnstile_page(self, source=None):
13691384
return True
13701385
return False
13711386

1387+
async def __on_an_incapsula_hcaptcha_page(self, *args, **kwargs):
1388+
if await self.is_element_visible('iframe[src*="Incapsula_Resource?"]'):
1389+
return True
1390+
return False
1391+
13721392
async def __on_a_g_recaptcha_page(self, *args, **kwargs):
13731393
await self.sleep(0.4) # reCAPTCHA may need a moment to appear
13741394
source = await self.get_html()
@@ -1382,7 +1402,7 @@ async def __on_a_g_recaptcha_page(self, *args, **kwargs):
13821402
await self.sleep(0.1)
13831403
return True
13841404
elif "com/recaptcha/api.js" in source:
1385-
await self.sleep(1.6) # Still loading
1405+
await self.sleep(1.2) # Maybe still loading
13861406
return True
13871407
return False
13881408

@@ -1391,18 +1411,34 @@ async def __gui_click_recaptcha(self):
13911411
if await self.is_element_present('iframe[title="reCAPTCHA"]'):
13921412
selector = 'iframe[title="reCAPTCHA"]'
13931413
else:
1394-
return
1414+
return False
13951415
await self.sleep(0.5)
13961416
with suppress(Exception):
1397-
element_rect = await self.get_gui_element_rect(selector, timeout=1)
1417+
element_rect = await self.get_element_rect(selector, timeout=0.1)
13981418
e_x = element_rect["x"]
13991419
e_y = element_rect["y"]
1420+
window_rect = await self.get_window_rect()
1421+
win_width = window_rect["innerWidth"]
1422+
win_height = window_rect["innerHeight"]
1423+
if (
1424+
e_x > 1040
1425+
and e_y > 640
1426+
and abs(win_width - e_x) < 110
1427+
and abs(win_height - e_y) < 110
1428+
):
1429+
# Probably the invisible reCAPTCHA in the bottom right corner
1430+
return False
1431+
gui_element_rect = await self.get_gui_element_rect(
1432+
selector, timeout=1
1433+
)
1434+
gui_e_x = gui_element_rect["x"]
1435+
gui_e_y = gui_element_rect["y"]
14001436
x_offset = 26
14011437
y_offset = 35
14021438
if await asyncio.to_thread(shared_utils.is_windows):
14031439
x_offset = 29
1404-
x = e_x + x_offset
1405-
y = e_y + y_offset
1440+
x = gui_e_x + x_offset
1441+
y = gui_e_y + y_offset
14061442
sb_config._saved_cf_x_y = (x, y) # For debugging later
14071443
await self.sleep(0.11)
14081444
gui_lock = FileLock(constants.MultiBrowser.PYAUTOGUILOCK)
@@ -1413,6 +1449,53 @@ async def __gui_click_recaptcha(self):
14131449
selector, x_offset, y_offset, timeout=1
14141450
)
14151451
await self.sleep(0.22)
1452+
return True
1453+
return False
1454+
1455+
async def __cdp_click_incapsula_hcaptcha(self):
1456+
selector = None
1457+
if await self.is_element_visible('iframe[src*="Incapsula_Resource?"]'):
1458+
outer_selector = 'iframe[src*="Incapsula_Resource?"]'
1459+
selector = "iframe[data-hcaptcha-widget-id]"
1460+
outer_element = await self.find_element_by_text(outer_selector)
1461+
element = await outer_element.query_selector_async(selector)
1462+
if not element:
1463+
return False
1464+
else:
1465+
return False
1466+
await self.sleep(0.55)
1467+
x_offset = 30
1468+
y_offset = 36
1469+
was_clicked = False
1470+
gui_lock = FileLock(constants.MultiBrowser.PYAUTOGUILOCK)
1471+
with gui_lock: # Prevent issues with multiple processes
1472+
await self.bring_to_front()
1473+
await self.sleep(0.056)
1474+
if "--debug" in sys.argv:
1475+
displayed_selector = "`%s`" % selector
1476+
if '"' not in selector:
1477+
displayed_selector = '"%s"' % selector
1478+
elif "'" not in selector:
1479+
displayed_selector = "'%s'" % selector
1480+
print(
1481+
" <DEBUG> click_with_offset(%s, %s, %s)"
1482+
% (displayed_selector, x_offset, y_offset)
1483+
)
1484+
with suppress(Exception):
1485+
await element.mouse_click_with_offset_async(
1486+
x=x_offset, y=y_offset, center=False
1487+
)
1488+
was_clicked = True
1489+
await self.sleep(0.075)
1490+
if was_clicked:
1491+
# Wait a moment for the click to succeed
1492+
await self.sleep(0.75)
1493+
if "--debug" in sys.argv:
1494+
print(" <DEBUG> hCaptcha was clicked!")
1495+
return True
1496+
if "--debug" in sys.argv:
1497+
print(" <DEBUG> hCaptcha was NOT clicked!")
1498+
return False
14161499

14171500
async def get_element_rect(self, selector, timeout=5):
14181501
element = await self.select(selector, timeout=timeout)
@@ -1505,6 +1588,25 @@ async def click(self, selector, timeout=5):
15051588
element = await self.find(selector, timeout=timeout)
15061589
await element.click_async()
15071590

1591+
async def click_if_visible(self, selector, timeout=0):
1592+
original_selector = selector
1593+
if (":contains(") in selector:
1594+
selector, _ = page_utils.recalculate_selector(
1595+
selector, by="css selector", xp_ok=True
1596+
)
1597+
if await self.is_element_visible(original_selector):
1598+
with suppress(Exception):
1599+
element = await self.find(selector, timeout=0.01)
1600+
await element.click_async()
1601+
elif timeout == 0:
1602+
return
1603+
else:
1604+
with suppress(Exception):
1605+
await self.find(selector, timeout=timeout)
1606+
if await self.is_element_visible(selector):
1607+
element = await self.find(selector, timeout=0.01)
1608+
await element.click_async()
1609+
15081610
async def click_with_offset(self, selector, x, y, center=False, timeout=5):
15091611
element = await self.find(selector, timeout=timeout)
15101612
await element.scroll_into_view_async()
@@ -1516,10 +1618,13 @@ async def solve_captcha(self):
15161618
if await self.__on_a_cf_turnstile_page(source):
15171619
pass
15181620
elif await self.__on_a_g_recaptcha_page(source):
1519-
await self.__gui_click_recaptcha()
1520-
return
1621+
result = await self.__gui_click_recaptcha()
1622+
return result
1623+
elif await self.__on_an_incapsula_hcaptcha_page():
1624+
result = await self.__cdp_click_incapsula_hcaptcha()
1625+
return result
15211626
else:
1522-
return
1627+
return False
15231628
selector = None
15241629
if await self.is_element_present('[class="cf-turnstile"]'):
15251630
selector = '[class="cf-turnstile"]'
@@ -1568,9 +1673,9 @@ async def solve_captcha(self):
15681673
):
15691674
selector = "div:not([class]) > div:not([class])"
15701675
else:
1571-
return
1676+
return False
15721677
if not selector:
1573-
return
1678+
return False
15741679
if (
15751680
await self.is_element_present("form")
15761681
and (
@@ -1670,6 +1775,8 @@ async def solve_captcha(self):
16701775
selector, x_offset, y_offset, timeout=1
16711776
)
16721777
await self.sleep(0.22)
1778+
return True
1779+
return False
16731780

16741781
async def click_captcha(self):
16751782
await self.solve_captcha()

0 commit comments

Comments
 (0)