2
2
from abc import ABC , abstractmethod
3
3
import time
4
4
from selenium import webdriver
5
+ from selenium .webdriver .chrome .options import Options as ChromeOptions
6
+ from selenium .webdriver .firefox .options import Options as FirefoxOptions
5
7
from entities import Book
6
8
from book_attribute_scraper import BookAttributeScraper
7
9
from book_review_scraper import AutomatedBookReviewScraper
@@ -21,7 +23,8 @@ def __init__(
21
23
automated_book_review_scraper : AutomatedBookReviewScraper ,
22
24
raw_data_storage : RawDataStorage ,
23
25
rds_data_storage : RDSDataStorage = None ,
24
- browser : str = 'chrome' ) -> None :
26
+ browser : str = 'chrome' ,
27
+ mode : str = 'normal' ) -> None :
25
28
"""
26
29
Args:
27
30
url (str): starting url for the book sraper
@@ -31,6 +34,7 @@ def __init__(
31
34
raw_data_storage (RawDataStorage): object for saving raw data
32
35
rds_data_storage (RDSDataStorage, optional): RDS interface object
33
36
browser (str, optional): select the browser.
37
+ mode (str, optional): normal or headless mode
34
38
"""
35
39
if not isinstance (book_attribute_scraper , BookAttributeScraper ):
36
40
raise TypeError ('Invalid type' )
@@ -49,14 +53,26 @@ def __init__(
49
53
# init Selenium
50
54
try :
51
55
if browser == 'chrome' :
56
+ chrome_options = ChromeOptions ()
57
+ if mode == 'headless' :
58
+ chrome_options .add_argument ("--headless" )
59
+ chrome_options .add_argument ('--no-sandbox' )
60
+ chrome_options .add_argument ('--disable-dev-shm-usage' )
61
+ self ._driver = webdriver .Chrome (options = chrome_options )
52
62
self ._driver .implicitly_wait (10 )
53
63
elif browser == 'firefox' :
64
+ firfox_options = FirefoxOptions ()
65
+ if mode == 'headless' :
66
+ firfox_options .add_argument ("--headless" )
67
+ firfox_options .add_argument ('--no-sandbox' )
68
+ firfox_options .add_argument ('--disable-dev-shm-usage' )
69
+ self ._driver = webdriver .Firefox (options = firfox_options )
54
70
self ._driver .implicitly_wait (10 )
55
71
else :
56
72
raise NotImplementedError (
57
73
'Only Chrome and Firefox are supported.' )
58
- except :
59
- print ('Selenium driver error' )
74
+ except Exception as e :
75
+ print ('Selenium driver error: ' , e )
60
76
61
77
# get to the url
62
78
try :
0 commit comments