Skip to content

Commit

Permalink
[major] full mode click error fix, skip option error fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Yoongi Kim committed Feb 20, 2019
1 parent f7220f5 commit d9bdef6
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
17 changes: 13 additions & 4 deletions collect_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,18 @@ def get_scroll(self):
return pos

def wait_and_click(self, xpath):
w = WebDriverWait(self.browser, 10)
elem = w.until(EC.element_to_be_clickable((By.XPATH, xpath)))
webdriver.ActionChains(self.browser).move_to_element(elem).click(elem).perform()
# Sometimes click fails unreasonably. So tries to click at all cost.
try:
w = WebDriverWait(self.browser, 15)
elem = w.until(EC.element_to_be_clickable((By.XPATH, xpath)))
elem.click()
except Exception as e:
print('Click time out - {}'.format(xpath))
print('Refreshing browser...')
self.browser.refresh()
time.sleep(2)
return self.wait_and_click(xpath)

return elem

def google(self, keyword, add_url=""):
Expand Down Expand Up @@ -155,7 +164,7 @@ def google_full(self, keyword, add_url=""):

print('Scraping links')

self.wait_and_click('//img[@class="rg_ic rg_i"]')
self.wait_and_click('//div[@data-ri="0"]')
time.sleep(1)

links = []
Expand Down
11 changes: 5 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,6 @@ def download_from_site(self, keyword, site_code):
collect = CollectLinks() # initialize chrome driver

try:
dirname = '{}/{}'.format(self.download_path, keyword)

if os.path.exists(os.path.join(os.getcwd(), dirname)) and self.skip:
print('Skipping already existing directory {}'.format(dirname))
return

print('Collecting links... {} from {}'.format(keyword, site_name))

if site_code == Sites.GOOGLE:
Expand Down Expand Up @@ -200,6 +194,11 @@ def do_crawling(self):
tasks = []

for keyword in keywords:
dir_name = '{}/{}'.format(self.download_path, keyword)
if os.path.exists(os.path.join(os.getcwd(), dir_name)) and self.skip:
print('Skipping already existing directory {}'.format(dir_name))
continue

if self.do_google:
if self.full_resolution:
tasks.append([keyword, Sites.GOOGLE_FULL])
Expand Down

0 comments on commit d9bdef6

Please sign in to comment.