Skip to content

Commit 2e557f6

Browse files
authored
Feature: minimal addition to run remote webdriver (#55)
* Add remote Selenium option * Add info about remote driver to readme * Import webdriver properly
1 parent 3adfa0f commit 2e557f6

File tree

2 files changed

+33
-9
lines changed

2 files changed

+33
-9
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ Optionally, set the path to the browser executable:
2525
SELENIUM_BROWSER_EXECUTABLE_PATH = which('firefox')
2626
```
2727

28+
In order to use a remote Selenium driver, specify `SELENIUM_COMMAND_EXECUTOR` instead of `SELENIUM_DRIVER_EXECUTABLE_PATH`:
29+
```python
30+
SELENIUM_COMMAND_EXECUTOR = 'http://localhost:4444/wd/hub'
31+
```
32+
2833
2. Add the `SeleniumMiddleware` to the downloader middlewares:
2934
```python
3035
DOWNLOADER_MIDDLEWARES = {

scrapy_selenium/middlewares.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
class SeleniumMiddleware:
1414
"""Scrapy middleware handling the requests using selenium"""
1515

16-
def __init__(self, driver_name, driver_executable_path, driver_arguments,
17-
browser_executable_path):
16+
def __init__(self, driver_name, driver_executable_path,
17+
browser_executable_path, command_executor, driver_arguments):
1818
"""Initialize the selenium webdriver
1919
2020
Parameters
@@ -27,6 +27,8 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
2727
A list of arguments to initialize the driver
2828
browser_executable_path: str
2929
The path of the executable binary of the browser
30+
command_executor: str
31+
Selenium remote server endpoint
3032
"""
3133

3234
webdriver_base_path = f'selenium.webdriver.{driver_name}'
@@ -38,6 +40,7 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
3840
driver_options_klass = getattr(driver_options_module, 'Options')
3941

4042
driver_options = driver_options_klass()
43+
4144
if browser_executable_path:
4245
driver_options.binary_location = browser_executable_path
4346
for argument in driver_arguments:
@@ -48,7 +51,19 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
4851
f'{driver_name}_options': driver_options
4952
}
5053

51-
self.driver = driver_klass(**driver_kwargs)
54+
# locally installed driver
55+
if driver_executable_path is not None:
56+
driver_kwargs = {
57+
'executable_path': driver_executable_path,
58+
f'{driver_name}_options': driver_options
59+
}
60+
self.driver = driver_klass(**driver_kwargs)
61+
# remote driver
62+
elif command_executor is not None:
63+
from selenium import webdriver
64+
capabilities = driver_options.to_capabilities()
65+
self.driver = webdriver.Remote(command_executor=command_executor,
66+
desired_capabilities=capabilities)
5267

5368
@classmethod
5469
def from_crawler(cls, crawler):
@@ -57,18 +72,22 @@ def from_crawler(cls, crawler):
5772
driver_name = crawler.settings.get('SELENIUM_DRIVER_NAME')
5873
driver_executable_path = crawler.settings.get('SELENIUM_DRIVER_EXECUTABLE_PATH')
5974
browser_executable_path = crawler.settings.get('SELENIUM_BROWSER_EXECUTABLE_PATH')
75+
command_executor = crawler.settings.get('SELENIUM_COMMAND_EXECUTOR')
6076
driver_arguments = crawler.settings.get('SELENIUM_DRIVER_ARGUMENTS')
6177

62-
if not driver_name or not driver_executable_path:
63-
raise NotConfigured(
64-
'SELENIUM_DRIVER_NAME and SELENIUM_DRIVER_EXECUTABLE_PATH must be set'
65-
)
78+
if driver_name is None:
79+
raise NotConfigured('SELENIUM_DRIVER_NAME must be set')
80+
81+
if driver_executable_path is None and command_executor is None:
82+
raise NotConfigured('Either SELENIUM_DRIVER_EXECUTABLE_PATH '
83+
'or SELENIUM_COMMAND_EXECUTOR must be set')
6684

6785
middleware = cls(
6886
driver_name=driver_name,
6987
driver_executable_path=driver_executable_path,
70-
driver_arguments=driver_arguments,
71-
browser_executable_path=browser_executable_path
88+
browser_executable_path=browser_executable_path,
89+
command_executor=command_executor,
90+
driver_arguments=driver_arguments
7291
)
7392

7493
crawler.signals.connect(middleware.spider_closed, signals.spider_closed)

0 commit comments

Comments
 (0)