Skip to content

Commit

Permalink
wait and click
Browse files Browse the repository at this point in the history
  • Loading branch information
Yoongi Kim committed Feb 19, 2019
1 parent 6a1d3fe commit ebbfbe2
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 14 deletions.
26 changes: 14 additions & 12 deletions collect_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from selenium.webdriver.common.by import By
from selenium.common.exceptions import ElementNotVisibleException
import platform
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


class CollectLinks:
def __init__(self):
Expand All @@ -44,6 +47,11 @@ def get_scroll(self):
pos = self.browser.execute_script("return window.pageYOffset;")
return pos

def wait_and_click(self, xpath):
w = WebDriverWait(self.browser, 10)
elem = w.until(EC.element_to_be_clickable((By.XPATH, xpath))).click()
return elem

def google(self, keyword, add_url=""):
self.browser.get("https://www.google.com/search?q={}&source=lnms&tbm=isch{}".format(keyword, add_url))

Expand All @@ -59,8 +67,7 @@ def google(self, keyword, add_url=""):

try:
# btn_more = self.browser.find_element(By.XPATH, '//input[@value="결과 더보기"]')
btn_more = self.browser.find_element(By.XPATH, '//input[@id="smb"]')
btn_more.click()
self.wait_and_click('//input[@id="smb"]')

for i in range(60):
elem.send_keys(Keys.PAGE_DOWN)
Expand Down Expand Up @@ -106,8 +113,7 @@ def naver(self, keyword, add_url=""):
time.sleep(0.2)

try:
btn_more = self.browser.find_element(By.XPATH, '//a[@class="btn_more _more"]')
btn_more.click()
self.wait_and_click('//a[@class="btn_more _more"]')

for i in range(60):
elem.send_keys(Keys.PAGE_DOWN)
Expand Down Expand Up @@ -142,16 +148,14 @@ def google_full(self, keyword, add_url=""):
print('[Full Resolution Mode]')

self.browser.get("https://www.google.co.kr/search?q={}&tbm=isch{}".format(keyword, add_url))

time.sleep(2)

elem = self.browser.find_element_by_tag_name("body")
time.sleep(1)

print('Scraping links')

boxes = self.browser.find_elements(By.XPATH, '//div[@class="rg_bx rg_di rg_el ivg-i"]')

boxes[0].click()
self.wait_and_click('//div[@class="rg_bx rg_di rg_el ivg-i"]')
time.sleep(1)

links = []
Expand Down Expand Up @@ -196,16 +200,14 @@ def naver_full(self, keyword, add_url=""):
print('[Full Resolution Mode]')

self.browser.get("https://search.naver.com/search.naver?where=image&sm=tab_jum&query={}{}".format(keyword, add_url))

time.sleep(2)

elem = self.browser.find_element_by_tag_name("body")
time.sleep(1)

print('Scraping links')

boxes = self.browser.find_elements(By.XPATH, '//div[@class="img_area _item"]')

boxes[0].click()
self.wait_and_click('//div[@class="img_area _item"]')
time.sleep(1)

links = []
Expand Down
3 changes: 1 addition & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ def get_keywords(keywords_file='keywords.txt'):
with open(keywords_file, 'r', encoding='utf-8-sig') as f:
text = f.read()
lines = text.split('\n')
if '' in lines:
lines.remove('')
lines = filter(lambda x: x != '' and x is not None, lines)
keywords = sorted(set(lines))

print('{} keywords found: {}'.format(len(keywords), keywords))
Expand Down

0 comments on commit ebbfbe2

Please sign in to comment.