Skip to content

Commit 17e3389

Browse files
Merge pull request #74 from kayqueGovetri/ENH/undetectable-chrome
ENH: Implement undetectable chrome
2 parents f44d705 + b096d81 commit 17e3389

File tree

7 files changed

+218
-22
lines changed

7 files changed

+218
-22
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
matrix:
2525
os: [ubuntu-latest, windows-latest, macos-latest]
2626
python-version: ["3.10"]
27-
browser: ["firefox", "chrome", "edge"]
27+
browser: ["firefox", "chrome", "edge", "undetected_chrome"]
2828
headless: [true]
2929
exclude:
3030
# Can't install firefox using setup-firefox on Windows

botcity/web/bot.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -259,16 +259,41 @@ def check_driver():
259259
self.capabilities = cap
260260
driver_path = self.driver_path or check_driver()
261261
self.driver_path = driver_path
262-
if compat.version_selenium_is_larger_than_four():
263-
service = BROWSER_CONFIGS.get(self.browser).get("service")
264-
service = service(executable_path=self.driver_path)
265-
service.desired_capabilities = cap
266-
267-
self._driver = driver_class(options=opt, service=service)
268-
else:
269-
self._driver = driver_class(options=opt, desired_capabilities=cap, executable_path=driver_path)
262+
self._driver = driver_class(**self._get_parameters_to_driver())
263+
self._others_configurations()
270264
self.set_screen_resolution()
271265

266+
def _get_parameters_to_driver(self):
267+
if self.browser == Browser.UNDETECTED_CHROME:
268+
return {"driver_executable_path": self.driver_path, "options": self.options,
269+
"desired_capabilities": self.capabilities}
270+
if compat.version_selenium_is_larger_than_four():
271+
return {"options": self.options, "service": self._get_service()}
272+
273+
return {"options": self.options, "desired_capabilities": self.capabilities,
274+
"executable_path": self.driver_path}
275+
276+
def _get_service(self):
277+
service = BROWSER_CONFIGS.get(self.browser).get("service")
278+
service = service(executable_path=self.driver_path)
279+
service.desired_capabilities = self.capabilities
280+
return service
281+
282+
def _others_configurations(self):
283+
if self.browser == Browser.UNDETECTED_CHROME:
284+
"""
285+
There is a problem in undetected chrome that prevents downloading files even passing
286+
download_folder_path in preferences.
287+
This solution is taken from the following issue
288+
https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/260#issuecomment-901276808.
289+
It will be a temporary solution.
290+
"""
291+
params = {
292+
"behavior": "allow",
293+
"downloadPath": self.download_folder_path
294+
}
295+
self.driver.execute_cdp_cmd("Page.setDownloadBehavior", params)
296+
272297
def stop_browser(self):
273298
"""
274299
Stops the Chrome browser and clean up the User Data Directory.
@@ -1128,7 +1153,7 @@ def wait_for_downloads(self, timeout: int = 120000):
11281153
Args:
11291154
timeout (int, optional): Timeout in millis. Defaults to 120000.
11301155
"""
1131-
if self.browser in [Browser.CHROME, Browser.EDGE] and self.headless:
1156+
if self.browser in [Browser.CHROME, Browser.UNDETECTED_CHROME, Browser.EDGE] and self.headless:
11321157
start_time = time.time()
11331158
while True:
11341159
elapsed_time = (time.time() - start_time) * 1000

botcity/web/browsers/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from . import firefox
55
from . import edge
66
from . import ie
7+
from . import undetected_chrome
78

89

910
class Browser(str, enum.Enum):
@@ -20,6 +21,7 @@ class Browser(str, enum.Enum):
2021
FIREFOX = "firefox"
2122
EDGE = "edge"
2223
IE = "ie"
24+
UNDETECTED_CHROME = 'undetected_chrome'
2325

2426

2527
class PageLoadStrategy(str, enum.Enum):
@@ -55,6 +57,14 @@ class PageLoadStrategy(str, enum.Enum):
5557
"wait_for_downloads": firefox.wait_for_downloads,
5658
"service": firefox.FirefoxService
5759
},
60+
Browser.UNDETECTED_CHROME: {
61+
"driver": "chromedriver",
62+
"class": undetected_chrome.Chrome, # noqa: F401, F403
63+
"options": undetected_chrome.default_options,
64+
"capabilities": undetected_chrome.default_capabilities,
65+
"wait_for_downloads": undetected_chrome.wait_for_downloads,
66+
"service": undetected_chrome.ChromeService
67+
},
5868
Browser.EDGE: {
5969
"driver": "msedgedriver",
6070
"class": edge.Edge,
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import atexit
2+
import json
3+
import os
4+
import tempfile
5+
from typing import Dict
6+
7+
from undetected_chromedriver import Chrome # noqa: F401, F403
8+
from undetected_chromedriver.options import ChromeOptions
9+
from undetected_chromedriver import Service as ChromeService # noqa: F401, F403
10+
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
11+
from ..util import cleanup_temp_dir
12+
13+
14+
def default_options(headless=False, download_folder_path=None, user_data_dir=None,
15+
page_load_strategy="normal") -> ChromeOptions:
16+
"""Retrieve the default options for this browser curated by BotCity.
17+
18+
Args:
19+
headless (bool, optional): Whether or not to use the headless mode. Defaults to False.
20+
download_folder_path (str, optional): The default path in which to save files.
21+
If None, the current directory is used. Defaults to None.
22+
user_data_dir ([type], optional): The directory to use as user profile.
23+
If None, a new temporary directory is used. Defaults to None.
24+
page_load_strategy (str, optional): The page load strategy. Defaults to "normal".
25+
26+
Returns:
27+
ChromeOptions: The Chrome options.
28+
"""
29+
chrome_options = ChromeOptions()
30+
try:
31+
page_load_strategy = page_load_strategy.value
32+
except AttributeError:
33+
page_load_strategy = page_load_strategy
34+
chrome_options.page_load_strategy = page_load_strategy
35+
chrome_options.add_argument("--remote-debugging-port=0")
36+
chrome_options.add_argument("--no-first-run")
37+
chrome_options.add_argument("--no-default-browser-check")
38+
chrome_options.add_argument("--disable-background-networking")
39+
chrome_options.add_argument("--disable-background-timer-throttling")
40+
chrome_options.add_argument("--disable-client-side-phishing-detection")
41+
chrome_options.add_argument("--disable-default-apps")
42+
chrome_options.add_argument("--disable-hang-monitor")
43+
chrome_options.add_argument("--disable-popup-blocking")
44+
chrome_options.add_argument("--disable-prompt-on-repost")
45+
chrome_options.add_argument("--disable-syncdisable-translate")
46+
chrome_options.add_argument("--metrics-recording-only")
47+
chrome_options.add_argument("--safebrowsing-disable-auto-update")
48+
49+
# Disable What's New banner for new chrome installs
50+
chrome_options.add_argument("--disable-features=ChromeWhatsNewUI")
51+
52+
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
53+
54+
# Disable banner for Browser being remote-controlled
55+
# chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
56+
# chrome_options.add_experimental_option('useAutomationExtension', False)
57+
58+
if headless:
59+
chrome_options.add_argument("--headless")
60+
chrome_options.add_argument("--disable-gpu")
61+
chrome_options.add_argument("--hide-scrollbars")
62+
chrome_options.add_argument("--mute-audio")
63+
64+
# Check if user is root
65+
try:
66+
# This is only valid with Unix
67+
if os.geteuid() == 0:
68+
chrome_options.add_argument("--no-sandbox")
69+
except AttributeError:
70+
pass
71+
72+
if not user_data_dir:
73+
temp_dir = tempfile.TemporaryDirectory(prefix="botcity_")
74+
user_data_dir = temp_dir.name
75+
atexit.register(cleanup_temp_dir, temp_dir)
76+
77+
chrome_options.add_argument(f"--user-data-dir={user_data_dir}")
78+
79+
if not download_folder_path:
80+
download_folder_path = os.getcwd()
81+
82+
app_state = {
83+
'recentDestinations': [{
84+
'id': 'Save as PDF',
85+
'origin': 'local',
86+
'account': ''
87+
}],
88+
'selectedDestinationId': 'Save as PDF',
89+
'version': 2
90+
}
91+
92+
# Set the Downloads default folder
93+
prefs = {
94+
"printing.print_preview_sticky_settings.appState": json.dumps(app_state),
95+
"download.default_directory": download_folder_path,
96+
"savefile.default_directory": download_folder_path,
97+
"printing.default_destination_selection_rules": {
98+
"kind": "local",
99+
"namePattern": "Save as PDF",
100+
},
101+
"safebrowsing.enabled": True,
102+
"credentials_enable_service": False,
103+
"profile.password_manager_enabled": False,
104+
"plugins.always_open_pdf_externally": True
105+
}
106+
107+
chrome_options.add_experimental_option("prefs", prefs)
108+
chrome_options.add_argument(
109+
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
110+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
111+
)
112+
113+
chrome_options.add_argument("--kiosk-printing")
114+
115+
return chrome_options
116+
117+
118+
def default_capabilities() -> Dict:
119+
"""Fetch the default capabilities for this browser.
120+
121+
Returns:
122+
Dict: Dictionary with the default capabilities defined.
123+
"""
124+
return DesiredCapabilities.CHROME.copy()
125+
126+
127+
def wait_for_downloads(driver):
128+
"""Wait for all downloads to finish.
129+
*Important*: This method overwrites the current page with the downloads page.
130+
"""
131+
if not driver.current_url.startswith("chrome://downloads"):
132+
driver.get("chrome://downloads/")
133+
return driver.execute_script("""
134+
var items = document.querySelector('downloads-manager')
135+
.shadowRoot.getElementById('downloadsList').items;
136+
if (items.every(e => e.state === "COMPLETE"))
137+
return items.map(e => e.fileUrl || e.file_url);
138+
""")

conftest.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import shutil
44
import tempfile
5+
import time
56

67
import pytest
78
import typing
@@ -18,6 +19,12 @@
1819
TEST_PAGE = "https://lf2a.github.io/webpage-test/test.html"
1920
INDEX_PAGE = "https://lf2a.github.io/webpage-test/"
2021

22+
platforms = {
23+
"Linux": "linux",
24+
"Darwin": "mac",
25+
"Windows": "windows"
26+
}
27+
2128

2229
def get_fake_bin_path(web: WebBot) -> str:
2330
return os.path.join(web.download_folder_path, 'fake.bin')
@@ -32,6 +39,15 @@ def setup_chrome(headless: bool, tmp_folder: str, download_driver: str) -> WebBo
3239
return web
3340

3441

42+
def setup_undetected_chrome(headless: bool, tmp_folder: str, download_driver: str) -> WebBot:
43+
web = WebBot(headless)
44+
web.browser = Browser.UNDETECTED_CHROME
45+
46+
web.driver_path = download_driver
47+
web.download_folder_path = tmp_folder
48+
return web
49+
50+
3551
def setup_firefox(headless: bool, tmp_folder: str, download_driver: str) -> WebBot:
3652
web = WebBot(headless)
3753
web.browser = Browser.FIREFOX
@@ -49,11 +65,6 @@ def setup_edge(headless: bool, tmp_folder: str, download_driver: str) -> WebBot:
4965
web.driver_path = download_driver
5066
web.download_folder_path = tmp_folder
5167
opt = browsers.edge.default_options(headless=headless, download_folder_path=tmp_folder)
52-
platforms = {
53-
"Linux": "linux",
54-
"Darwin": "mac",
55-
"Windows": "windows"
56-
}
5768
platform_name = platforms.get(platform.system())
5869

5970
opt.platform_name = platform_name
@@ -66,7 +77,8 @@ def factory_setup_browser(browser: str, is_headless: bool, tmp_folder: str, down
6677
dict_browsers = {
6778
'chrome': setup_chrome,
6879
'firefox': setup_firefox,
69-
'edge': setup_edge
80+
'edge': setup_edge,
81+
'undetected_chrome': setup_undetected_chrome
7082
}
7183

7284
setup_browser = dict_browsers.get(browser, None)
@@ -81,7 +93,8 @@ def factory_driver_manager(browser: str):
8193
dict_driver_manager = {
8294
'chrome': ChromeDriverManager,
8395
'firefox': GeckoDriverManager,
84-
'edge': EdgeChromiumDriverManager
96+
'edge': EdgeChromiumDriverManager,
97+
'undetected_chrome': ChromeDriverManager
8598
}
8699

87100
driver_manager = dict_driver_manager.get(browser, None)
@@ -106,6 +119,9 @@ def download_driver(request):
106119
manager = factory_driver_manager(browser=browser)
107120
installed_driver = manager(path=folder_driver).install()
108121
yield installed_driver
122+
# Issue: https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/551
123+
if platforms.get(platform.system()) == "windows" and browser == Browser.UNDETECTED_CHROME:
124+
time.sleep(3)
109125
shutil.rmtree(folder_driver)
110126

111127

@@ -128,4 +144,3 @@ def get_event_result(id_event: str, web: WebBot) -> typing.Dict:
128144
def pytest_addoption(parser):
129145
parser.addoption('--headless', action='store', default="true")
130146
parser.addoption('--browser', action='store', default='chrome')
131-

requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@ beautifulsoup4
33
numpy
44
opencv-python
55
pillow
6-
selenium==4.6.1
6+
selenium>=4.6.1,<5
7+
undetected-chromedriver
8+
packaging

tests/test_browser.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import os
2+
import platform
3+
24
import pytest
35
import conftest
46

57
from PIL import Image
6-
from botcity.web import WebBot, By
8+
from botcity.web import WebBot, By, Browser
79

810

911
def test_context(web: WebBot):
@@ -201,8 +203,12 @@ def test_leave_iframe(web: WebBot):
201203
def test_get_view_port_size(web: WebBot):
202204
web.browse(conftest.INDEX_PAGE)
203205
size = web.get_viewport_size()
204-
205-
element = web.find_element('window-size', By.ID).text.split('x')
206+
if web.browser == Browser.UNDETECTED_CHROME and conftest.platforms.get(platform.system()) == 'mac':
207+
width = web.execute_javascript("return window.innerWidth")
208+
height = web.execute_javascript("return window.innerHeight")
209+
element = [width, height]
210+
else:
211+
element = web.find_element('window-size', By.ID).text.split('x')
206212
assert size == tuple(int(e) for e in element)
207213

208214

0 commit comments

Comments
 (0)