[major] full mode click error fix, skip option error fix

yogeshlc · Feb 20, 2019 · d9bdef6 · d9bdef6
1 parent f7220f5
commit d9bdef6
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 10 deletions.
diff --git a/collect_links.py b/collect_links.py
@@ -48,9 +48,18 @@ def get_scroll(self):
         return pos
 
     def wait_and_click(self, xpath):
-        w = WebDriverWait(self.browser, 10)
-        elem = w.until(EC.element_to_be_clickable((By.XPATH, xpath)))
-        webdriver.ActionChains(self.browser).move_to_element(elem).click(elem).perform()
+        #  Sometimes click fails unreasonably. So tries to click at all cost.
+        try:
+            w = WebDriverWait(self.browser, 15)
+            elem = w.until(EC.element_to_be_clickable((By.XPATH, xpath)))
+            elem.click()
+        except Exception as e:
+            print('Click time out - {}'.format(xpath))
+            print('Refreshing browser...')
+            self.browser.refresh()
+            time.sleep(2)
+            return self.wait_and_click(xpath)
+
         return elem
 
     def google(self, keyword, add_url=""):
@@ -155,7 +164,7 @@ def google_full(self, keyword, add_url=""):
 
         print('Scraping links')
 
-        self.wait_and_click('//img[@class="rg_ic rg_i"]')
+        self.wait_and_click('//div[@data-ri="0"]')
         time.sleep(1)
 
         links = []

diff --git a/main.py b/main.py
@@ -159,12 +159,6 @@ def download_from_site(self, keyword, site_code):
         collect = CollectLinks()  # initialize chrome driver
 
         try:
-            dirname = '{}/{}'.format(self.download_path, keyword)
-
-            if os.path.exists(os.path.join(os.getcwd(), dirname)) and self.skip:
-                print('Skipping already existing directory {}'.format(dirname))
-                return
-
             print('Collecting links... {} from {}'.format(keyword, site_name))
 
             if site_code == Sites.GOOGLE:
@@ -200,6 +194,11 @@ def do_crawling(self):
         tasks = []
 
         for keyword in keywords:
+            dir_name = '{}/{}'.format(self.download_path, keyword)
+            if os.path.exists(os.path.join(os.getcwd(), dir_name)) and self.skip:
+                print('Skipping already existing directory {}'.format(dir_name))
+                continue
+
             if self.do_google:
                 if self.full_resolution:
                     tasks.append([keyword, Sites.GOOGLE_FULL])