Skip to content

Commit

Permalink
Headless mode as optional
Browse files Browse the repository at this point in the history
  • Loading branch information
Yoongi Kim committed Jun 25, 2020
1 parent 45c9911 commit 9bd8863
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
http://chromedriver.chromium.org/downloads
http://chromedriver.chromium.org/downloads
5 changes: 3 additions & 2 deletions collect_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


class CollectLinks:
def __init__(self):
def __init__(self, no_gui=False):
executable = ''

if platform.system() == 'Windows':
Expand All @@ -47,9 +47,10 @@ def __init__(self):
raise FileNotFoundError('Chromedriver file should be placed at {}'.format(executable))

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
if no_gui:
chrome_options.add_argument('--headless')
self.browser = webdriver.Chrome(executable, chrome_options=chrome_options)

browser_version = 'Failed to detect version'
Expand Down
24 changes: 20 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_face_url(code):

class AutoCrawler:
def __init__(self, skip_already_exist=True, n_threads=4, do_google=True, do_naver=True, download_path='download',
full_resolution=False, face=False):
full_resolution=False, face=False, no_gui=False):
"""
:param skip_already_exist: Skips keyword already downloaded before. This is needed when re-downloading.
:param n_threads: Number of threads to download.
Expand All @@ -61,6 +61,7 @@ def __init__(self, skip_already_exist=True, n_threads=4, do_google=True, do_nave
:param download_path: Download folder path
:param full_resolution: Download full resolution image instead of thumbnails (slow)
:param face: Face search mode
:param no_gui: No GUI mode. Acceleration for full_resolution mode.
"""

self.skip = skip_already_exist
Expand All @@ -70,6 +71,7 @@ def __init__(self, skip_already_exist=True, n_threads=4, do_google=True, do_nave
self.download_path = download_path
self.full_resolution = full_resolution
self.face = face
self.no_gui = no_gui

os.makedirs('./{}'.format(self.download_path), exist_ok=True)

Expand Down Expand Up @@ -202,7 +204,7 @@ def download_from_site(self, keyword, site_code):
add_url = Sites.get_face_url(site_code) if self.face else ""

try:
collect = CollectLinks() # initialize chrome driver
collect = CollectLinks(no_gui=self.no_gui) # initialize chrome driver
except Exception as e:
print('Error occurred while initializing chromedriver - {}'.format(e))
return
Expand Down Expand Up @@ -323,6 +325,9 @@ def imbalance_check(self):
parser.add_argument('--naver', type=str, default='true', help='Download from naver.com (boolean)')
parser.add_argument('--full', type=str, default='false', help='Download full resolution image instead of thumbnails (slow)')
parser.add_argument('--face', type=str, default='false', help='Face search mode')
parser.add_argument('--no_gui', type=str, default='auto', help='No GUI mode. Acceleration for full_resolution mode. '
'But unstable on thumbnail mode. '
'Default: "auto" - false if full=false, true if full=true')
args = parser.parse_args()

_skip = False if str(args.skip).lower() == 'false' else True
Expand All @@ -332,7 +337,18 @@ def imbalance_check(self):
_full = False if str(args.full).lower() == 'false' else True
_face = False if str(args.face).lower() == 'false' else True

print('Options - skip:{}, threads:{}, google:{}, naver:{}, full_resolution:{}, face:{}'.format(_skip, _threads, _google, _naver, _full, _face))
no_gui_input = str(args.no_gui).lower()
if no_gui_input == 'auto':
_no_gui = _full
elif no_gui_input == 'true':
_no_gui = True
else:
_no_gui = False

crawler = AutoCrawler(skip_already_exist=_skip, n_threads=_threads, do_google=_google, do_naver=_naver, full_resolution=_full, face=_face)
print('Options - skip:{}, threads:{}, google:{}, naver:{}, full_resolution:{}, face:{}, no_gui:{}'
.format(_skip, _threads, _google, _naver, _full, _face, _no_gui))

crawler = AutoCrawler(skip_already_exist=_skip, n_threads=_threads,
do_google=_google, do_naver=_naver, full_resolution=_full,
face=_face, no_gui=_no_gui)
crawler.do_crawling()

0 comments on commit 9bd8863

Please sign in to comment.