Skip to content

Commit

Permalink
google full error fix
Browse files Browse the repository at this point in the history
  • Loading branch information
YoongiKim committed May 23, 2023
1 parent 2518344 commit 254a2e3
Showing 1 changed file with 15 additions and 33 deletions.
48 changes: 15 additions & 33 deletions collect_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,42 +212,25 @@ def google_full(self, keyword, add_url="", limit=100):
time.sleep(1)

links = []
limit = 10000 if limit == 0 else limit
count = 1

last_scroll = 0
scroll_patience = 0

NUM_MAX_RETRY = 30
NUM_MAX_SCROLL_PATIENCE = 100
for _ in range(limit):

while len(links) < limit:
try:
xpath = '//div[@id="islsp"]//div[@class="v4dQwb"]'
div_box = self.browser.find_element(By.XPATH, xpath)
self.highlight(div_box)

for _ in range(NUM_MAX_RETRY):
try:
xpath = '//img[@class="n3VNCb pT0Scc KAlRDb"]'
img = div_box.find_element(By.XPATH, xpath)
self.highlight(img)
break
except:
time.sleep(0.1)
pass

xpath = '//div[@class="k7O2sd"]'
loading_bar = div_box.find_element(By.XPATH, xpath)

# Wait for image to load. If not it will display base64 code.
while str(loading_bar.get_attribute('style')) != 'display: none;':
time.sleep(0.1)

src = img.get_attribute('src')

if src is not None:
links.append(src)
print('%d: %s' % (count, src))
count += 1
xpath = '//div[@class="n4hgof"]//img[@class="r48jcc pT0Scc iPVvYb"]'
imgs = elem.find_elements(By.XPATH, xpath)

for img in imgs:
self.highlight(img)
src = img.get_attribute('src')

if src is not None and src not in links:
links.append(src)
print('%d: %s' % (count, src))
count += 1

except KeyboardInterrupt:
break
Expand All @@ -266,8 +249,7 @@ def google_full(self, keyword, add_url="", limit=100):
last_scroll = scroll

if scroll_patience >= NUM_MAX_SCROLL_PATIENCE:
elem.send_keys(Keys.RIGHT)
continue
break

elem.send_keys(Keys.RIGHT)

Expand Down

0 comments on commit 254a2e3

Please sign in to comment.