13
13
class SeleniumMiddleware :
14
14
"""Scrapy middleware handling the requests using selenium"""
15
15
16
- def __init__ (self , driver_name , driver_executable_path , driver_arguments ,
17
- browser_executable_path ):
16
+ def __init__ (self , driver_name , driver_executable_path ,
17
+ browser_executable_path , command_executor , driver_arguments ):
18
18
"""Initialize the selenium webdriver
19
19
20
20
Parameters
@@ -27,6 +27,8 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
27
27
A list of arguments to initialize the driver
28
28
browser_executable_path: str
29
29
The path of the executable binary of the browser
30
+ command_executor: str
31
+ Selenium remote server endpoint
30
32
"""
31
33
32
34
webdriver_base_path = f'selenium.webdriver.{ driver_name } '
@@ -38,6 +40,7 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
38
40
driver_options_klass = getattr (driver_options_module , 'Options' )
39
41
40
42
driver_options = driver_options_klass ()
43
+
41
44
if browser_executable_path :
42
45
driver_options .binary_location = browser_executable_path
43
46
for argument in driver_arguments :
@@ -48,7 +51,19 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments,
48
51
f'{ driver_name } _options' : driver_options
49
52
}
50
53
51
- self .driver = driver_klass (** driver_kwargs )
54
+ # locally installed driver
55
+ if driver_executable_path is not None :
56
+ driver_kwargs = {
57
+ 'executable_path' : driver_executable_path ,
58
+ f'{ driver_name } _options' : driver_options
59
+ }
60
+ self .driver = driver_klass (** driver_kwargs )
61
+ # remote driver
62
+ elif command_executor is not None :
63
+ from selenium import webdriver
64
+ capabilities = driver_options .to_capabilities ()
65
+ self .driver = webdriver .Remote (command_executor = command_executor ,
66
+ desired_capabilities = capabilities )
52
67
53
68
@classmethod
54
69
def from_crawler (cls , crawler ):
@@ -57,18 +72,22 @@ def from_crawler(cls, crawler):
57
72
driver_name = crawler .settings .get ('SELENIUM_DRIVER_NAME' )
58
73
driver_executable_path = crawler .settings .get ('SELENIUM_DRIVER_EXECUTABLE_PATH' )
59
74
browser_executable_path = crawler .settings .get ('SELENIUM_BROWSER_EXECUTABLE_PATH' )
75
+ command_executor = crawler .settings .get ('SELENIUM_COMMAND_EXECUTOR' )
60
76
driver_arguments = crawler .settings .get ('SELENIUM_DRIVER_ARGUMENTS' )
61
77
62
- if not driver_name or not driver_executable_path :
63
- raise NotConfigured (
64
- 'SELENIUM_DRIVER_NAME and SELENIUM_DRIVER_EXECUTABLE_PATH must be set'
65
- )
78
+ if driver_name is None :
79
+ raise NotConfigured ('SELENIUM_DRIVER_NAME must be set' )
80
+
81
+ if driver_executable_path is None and command_executor is None :
82
+ raise NotConfigured ('Either SELENIUM_DRIVER_EXECUTABLE_PATH '
83
+ 'or SELENIUM_COMMAND_EXECUTOR must be set' )
66
84
67
85
middleware = cls (
68
86
driver_name = driver_name ,
69
87
driver_executable_path = driver_executable_path ,
70
- driver_arguments = driver_arguments ,
71
- browser_executable_path = browser_executable_path
88
+ browser_executable_path = browser_executable_path ,
89
+ command_executor = command_executor ,
90
+ driver_arguments = driver_arguments
72
91
)
73
92
74
93
crawler .signals .connect (middleware .spider_closed , signals .spider_closed )
0 commit comments