Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.10"]
browser: ["firefox", "chrome", "edge"]
browser: ["firefox", "chrome", "edge", "undetected_chrome"]
headless: [true]
exclude:
# Can't install firefox using setup-firefox on Windows
Expand Down
12 changes: 8 additions & 4 deletions botcity/web/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from .bot import WebBot, Browser, BROWSER_CONFIGS, By, PageLoadStrategy # noqa: F401, F403
from .parsers import table_to_dict, data_from_row, sanitize_header # noqa: F401, F403
from .util import element_as_select # noqa: F401, F403
# PATCH Selenium for compat
from .compat import patch_selenium # noqa: F401, F403, E402
patch_selenium() # noqa: F401, F403, E402

from botcity.web._version import get_versions
from .bot import WebBot, Browser, BROWSER_CONFIGS, By, PageLoadStrategy # noqa: F401, F403, E402
from .parsers import table_to_dict, data_from_row, sanitize_header # noqa: F401, F403, E402
from .util import element_as_select # noqa: F401, F403, E402

from botcity.web._version import get_versions # noqa: F401, F403, E402
__version__ = get_versions()['version']
del get_versions
44 changes: 37 additions & 7 deletions botcity/web/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC

from . import config, cv2find
from . import config, cv2find, compat
from .browsers import BROWSER_CONFIGS, Browser, PageLoadStrategy

try:
Expand Down Expand Up @@ -235,7 +235,6 @@ def start_browser(self):
"""
Starts the selected browser.
"""

def check_driver():
# Look for driver
driver_name = BROWSER_CONFIGS.get(self.browser).get("driver")
Expand All @@ -260,10 +259,41 @@ def check_driver():
self.capabilities = cap
driver_path = self.driver_path or check_driver()
self.driver_path = driver_path

self._driver = driver_class(options=opt, desired_capabilities=cap, executable_path=driver_path)
self._driver = driver_class(**self._get_parameters_to_driver())
self._others_configurations()
self.set_screen_resolution()

def _get_parameters_to_driver(self):
if self.browser == Browser.UNDETECTED_CHROME:
return {"driver_executable_path": self.driver_path, "options": self.options,
"desired_capabilities": self.capabilities}
if compat.version_selenium_is_larger_than_four():
return {"options": self.options, "service": self._get_service()}

return {"options": self.options, "desired_capabilities": self.capabilities,
"executable_path": self.driver_path}

def _get_service(self):
service = BROWSER_CONFIGS.get(self.browser).get("service")
service = service(executable_path=self.driver_path)
service.desired_capabilities = self.capabilities
return service

def _others_configurations(self):
if self.browser == Browser.UNDETECTED_CHROME:
"""
There is a problem in undetected chrome that prevents downloading files even passing
download_folder_path in preferences.
This solution is taken from the following issue
https://github.com/ultrafunkamsterdam/undetected-chromedriver/issues/260#issuecomment-901276808.
It will be a temporary solution.
"""
params = {
"behavior": "allow",
"downloadPath": self.download_folder_path
}
self.driver.execute_cdp_cmd("Page.setDownloadBehavior", params)

def stop_browser(self):
"""
Stops the Chrome browser and clean up the User Data Directory.
Expand Down Expand Up @@ -1123,7 +1153,7 @@ def wait_for_downloads(self, timeout: int = 120000):
Args:
timeout (int, optional): Timeout in millis. Defaults to 120000.
"""
if self.browser in [Browser.CHROME, Browser.EDGE] and self.headless:
if self.browser in [Browser.CHROME, Browser.UNDETECTED_CHROME, Browser.EDGE] and self.headless:
start_time = time.time()
while True:
elapsed_time = (time.time() - start_time) * 1000
Expand Down Expand Up @@ -1369,14 +1399,14 @@ def mouse_move(self, x, y):
if self.browser == Browser.FIREFOX:
# Reset coordinates if the page has gone stale. Only required for Firefox
if self._html_elem is None:
self._html_elem = self._driver.find_element_by_tag_name('body')
self._html_elem = self._driver.find_element(By.TAG_NAME, 'body')
self._x = 0
self._y = 0
else:
try:
self._html_elem.is_enabled()
except StaleElementReferenceException:
self._html_elem = self._driver.find_element_by_tag_name('body')
self._html_elem = self._driver.find_element(By.TAG_NAME, 'body')
self._x = 0
self._y = 0

Expand Down
22 changes: 18 additions & 4 deletions botcity/web/browsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from . import firefox
from . import edge
from . import ie
from . import undetected_chrome


class Browser(str, enum.Enum):
Expand All @@ -20,6 +21,7 @@ class Browser(str, enum.Enum):
FIREFOX = "firefox"
EDGE = "edge"
IE = "ie"
UNDETECTED_CHROME = 'undetected_chrome'


class PageLoadStrategy(str, enum.Enum):
Expand All @@ -44,27 +46,39 @@ class PageLoadStrategy(str, enum.Enum):
"class": chrome.Chrome,
"options": chrome.default_options,
"capabilities": chrome.default_capabilities,
"wait_for_downloads": chrome.wait_for_downloads
"wait_for_downloads": chrome.wait_for_downloads,
"service": chrome.ChromeService,
},
Browser.FIREFOX: {
"driver": "geckodriver",
"class": firefox.Firefox,
"options": firefox.default_options,
"capabilities": firefox.default_capabilities,
"wait_for_downloads": firefox.wait_for_downloads
"wait_for_downloads": firefox.wait_for_downloads,
"service": firefox.FirefoxService
},
Browser.UNDETECTED_CHROME: {
"driver": "chromedriver",
"class": undetected_chrome.Chrome, # noqa: F401, F403
"options": undetected_chrome.default_options,
"capabilities": undetected_chrome.default_capabilities,
"wait_for_downloads": undetected_chrome.wait_for_downloads,
"service": undetected_chrome.ChromeService
},
Browser.EDGE: {
"driver": "msedgedriver",
"class": edge.Edge,
"options": edge.default_options,
"capabilities": edge.default_capabilities,
"wait_for_downloads": edge.wait_for_downloads
"wait_for_downloads": edge.wait_for_downloads,
"service": edge.EdgeService
},
Browser.IE: {
"driver": "IEDriverServer",
"class": ie.Ie,
"options": ie.default_options,
"capabilities": ie.default_capabilities,
"wait_for_downloads": ie.wait_for_downloads
"wait_for_downloads": ie.wait_for_downloads,
"service": ie.IeService
},
}
2 changes: 1 addition & 1 deletion botcity/web/browsers/chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from selenium.webdriver import Chrome # noqa: F401, F403
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

from selenium.webdriver.chrome.service import Service as ChromeService # noqa: F401, F403
from ..util import cleanup_temp_dir


Expand Down
4 changes: 2 additions & 2 deletions botcity/web/browsers/edge.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import time
from typing import Dict

from msedge.selenium_tools import Edge, EdgeOptions # noqa: F401, F403
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

from selenium.webdriver import Edge, EdgeOptions # noqa: F401, F403
from selenium.webdriver.edge.service import Service as EdgeService # noqa: F401, F403
from ..util import cleanup_temp_dir


Expand Down
24 changes: 10 additions & 14 deletions botcity/web/browsers/firefox.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import tempfile
from typing import Dict

from selenium import webdriver
from selenium.webdriver import Firefox # noqa: F401, F403
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.firefox.service import Service as FirefoxService # noqa: F401, F403

from ..util import cleanup_temp_dir

Expand Down Expand Up @@ -362,23 +362,19 @@ def default_options(headless=False, download_folder_path=None, user_data_dir=Non
temp_dir = tempfile.TemporaryDirectory(prefix="botcity_")
user_data_dir = temp_dir.name
atexit.register(cleanup_temp_dir, temp_dir)
firefox_profile = webdriver.FirefoxProfile(user_data_dir)
firefox_profile.set_preference("security.default_personal_cert", "Select Automatically")
firefox_profile.set_preference('browser.download.folderList', 2)
firefox_profile.set_preference('browser.download.manager.showWhenStarting', False)
firefox_options.set_preference("profile", user_data_dir)
firefox_options.set_preference("security.default_personal_cert", "Select Automatically")
firefox_options.set_preference('browser.download.folderList', 2)
firefox_options.set_preference('browser.download.manager.showWhenStarting', False)
if not download_folder_path:
download_folder_path = os.getcwd()
firefox_profile.set_preference('browser.download.dir', download_folder_path)
firefox_profile.set_preference('general.warnOnAboutConfig', False)
firefox_options.set_preference('browser.download.dir', download_folder_path)
firefox_options.set_preference('general.warnOnAboutConfig', False)

mimetypes_to_download = ",".join(FIREFOX_MIMETYPES_TO_DOWNLOAD)
firefox_profile.set_preference("pdfjs.disabled", True)
firefox_profile.set_preference("plugin.disable_full_page_plugin_for_types", mimetypes_to_download)
firefox_profile.set_preference('browser.helperApps.neverAsk.saveToDisk', mimetypes_to_download)

firefox_profile.update_preferences()
firefox_options.profile = firefox_profile

firefox_options.set_preference("pdfjs.disabled", True)
firefox_options.set_preference("plugin.disable_full_page_plugin_for_types", mimetypes_to_download)
firefox_options.set_preference('browser.helperApps.neverAsk.saveToDisk', mimetypes_to_download)
return firefox_options


Expand Down
1 change: 1 addition & 0 deletions botcity/web/browsers/ie.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from selenium.webdriver import Ie # noqa: F401, F403
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.ie.options import Options
from selenium.webdriver.ie.service import Service as IeService # noqa: F401, F403


def default_options(headless=False, download_folder_path=None, user_data_dir=None,
Expand Down
138 changes: 138 additions & 0 deletions botcity/web/browsers/undetected_chrome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import atexit
import json
import os
import tempfile
from typing import Dict

from undetected_chromedriver import Chrome # noqa: F401, F403
from undetected_chromedriver.options import ChromeOptions
from undetected_chromedriver import Service as ChromeService # noqa: F401, F403
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from ..util import cleanup_temp_dir


def default_options(headless=False, download_folder_path=None, user_data_dir=None,
page_load_strategy="normal") -> ChromeOptions:
"""Retrieve the default options for this browser curated by BotCity.

Args:
headless (bool, optional): Whether or not to use the headless mode. Defaults to False.
download_folder_path (str, optional): The default path in which to save files.
If None, the current directory is used. Defaults to None.
user_data_dir ([type], optional): The directory to use as user profile.
If None, a new temporary directory is used. Defaults to None.
page_load_strategy (str, optional): The page load strategy. Defaults to "normal".

Returns:
ChromeOptions: The Chrome options.
"""
chrome_options = ChromeOptions()
try:
page_load_strategy = page_load_strategy.value
except AttributeError:
page_load_strategy = page_load_strategy
chrome_options.page_load_strategy = page_load_strategy
chrome_options.add_argument("--remote-debugging-port=0")
chrome_options.add_argument("--no-first-run")
chrome_options.add_argument("--no-default-browser-check")
chrome_options.add_argument("--disable-background-networking")
chrome_options.add_argument("--disable-background-timer-throttling")
chrome_options.add_argument("--disable-client-side-phishing-detection")
chrome_options.add_argument("--disable-default-apps")
chrome_options.add_argument("--disable-hang-monitor")
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--disable-prompt-on-repost")
chrome_options.add_argument("--disable-syncdisable-translate")
chrome_options.add_argument("--metrics-recording-only")
chrome_options.add_argument("--safebrowsing-disable-auto-update")

# Disable What's New banner for new chrome installs
chrome_options.add_argument("--disable-features=ChromeWhatsNewUI")

chrome_options.add_argument("--disable-blink-features=AutomationControlled")

# Disable banner for Browser being remote-controlled
# chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
# chrome_options.add_experimental_option('useAutomationExtension', False)

if headless:
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--hide-scrollbars")
chrome_options.add_argument("--mute-audio")

# Check if user is root
try:
# This is only valid with Unix
if os.geteuid() == 0:
chrome_options.add_argument("--no-sandbox")
except AttributeError:
pass

if not user_data_dir:
temp_dir = tempfile.TemporaryDirectory(prefix="botcity_")
user_data_dir = temp_dir.name
atexit.register(cleanup_temp_dir, temp_dir)

chrome_options.add_argument(f"--user-data-dir={user_data_dir}")

if not download_folder_path:
download_folder_path = os.getcwd()

app_state = {
'recentDestinations': [{
'id': 'Save as PDF',
'origin': 'local',
'account': ''
}],
'selectedDestinationId': 'Save as PDF',
'version': 2
}

# Set the Downloads default folder
prefs = {
"printing.print_preview_sticky_settings.appState": json.dumps(app_state),
"download.default_directory": download_folder_path,
"savefile.default_directory": download_folder_path,
"printing.default_destination_selection_rules": {
"kind": "local",
"namePattern": "Save as PDF",
},
"safebrowsing.enabled": True,
"credentials_enable_service": False,
"profile.password_manager_enabled": False,
"plugins.always_open_pdf_externally": True
}

chrome_options.add_experimental_option("prefs", prefs)
chrome_options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
)

chrome_options.add_argument("--kiosk-printing")

return chrome_options


def default_capabilities() -> Dict:
"""Fetch the default capabilities for this browser.

Returns:
Dict: Dictionary with the default capabilities defined.
"""
return DesiredCapabilities.CHROME.copy()


def wait_for_downloads(driver):
"""Wait for all downloads to finish.
*Important*: This method overwrites the current page with the downloads page.
"""
if not driver.current_url.startswith("chrome://downloads"):
driver.get("chrome://downloads/")
return driver.execute_script("""
var items = document.querySelector('downloads-manager')
.shadowRoot.getElementById('downloadsList').items;
if (items.every(e => e.state === "COMPLETE"))
return items.map(e => e.fileUrl || e.file_url);
""")
Loading