Skip to content

Commit ab803d2

Browse files
committed
scraping
1 parent 4f2fcca commit ab803d2

File tree

5 files changed

+130
-0
lines changed

5 files changed

+130
-0
lines changed

__scraping__/forexfactory.com/main.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env python3
2+
3+
# date: 2019.12.30
4+
# https://stackoverflow.com/questions/59535798/python-webscraping-with-beautifulsoup-not-displaying-full-content/59536553#59536553
5+
6+
import requests
7+
from bs4 import BeautifulSoup
8+
9+
r = requests.get("https://www.forexfactory.com/#detail=108867")
10+
# page uses JavaScript to redirect page so browser may shows different results.
11+
12+
soup = BeautifulSoup(r.text, 'lxml')
13+
14+
table = soup.find("table", class_="calendar__table")
15+
16+
for row in table.find_all('tr', class_='calendar__row--grey'):
17+
18+
currency = row.find("td", class_="currency")
19+
#print(currency.prettify()) # before get text
20+
currency = currency.get_text(strip=True)
21+
22+
actual = row.find("td", class_="actual")
23+
actual = actual.get_text(strip=True)
24+
25+
forecast = row.find("td", class_="forecast")
26+
forecast = forecast.get_text(strip=True)
27+
28+
print(currency, actual, forecast)
29+
30+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env python3
2+
3+
# date: 2020.01.01
4+
# https://stackoverflow.com/questions/59551193/i-want-to-download-images-from-python-what-should-i-do/
5+
6+
from selenium import webdriver
7+
import requests
8+
9+
#path = r"C:\Users\qpslt\Desktop\py\chromedriver_win32\chromedriver.exe"
10+
#driver = webdriver.Chrome(path)
11+
driver = webdriver.Firefox()
12+
13+
url = "https://gall.dcinside.com/board/view/?id=baseball_new8&no=10131338&exception_mode=recommend&page=1"
14+
driver.get(url)
15+
16+
images = driver.find_elements_by_xpath('//div[@class="writing_view_box"]//img')
17+
18+
for i, img in enumerate(images, 1):
19+
img_url = img.get_attribute('src')
20+
print(i, img_url)
21+
22+
r = requests.get(img_url, headers={'Referer': url})
23+
24+
with open("c:/test/{}.jpg".format(i), 'wb') as f:
25+
f.write(r.content)
26+
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env python3
2+
3+
# date: 2020.01.02
4+
# ???
5+
6+
import urllib.request
7+
import bs4 as bs
8+
9+
sauce = urllib.request.urlopen('https://msih.bgu.ac.il/md-program/residency-placements/').read()
10+
soup = bs.BeautifulSoup(sauce, 'lxml')
11+
12+
headers = soup.find_all('div', class_={'accord-head'})
13+
grad_yr_list = []
14+
for header in headers:
15+
grad_yr_list.append(header.h2.text[-4:])
16+
17+
rez_classes = soup.find_all('div', class_={'accord-con'})
18+
19+
data_dict = dict(zip(grad_yr_list, rez_classes))
20+
21+
for key, value in data_dict.items():
22+
print(type(value), key, value.find('h4').text)
23+

__scraping__/seedly.sg/main.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env python3
2+
3+
# date: 2020.01.02
4+
# ???
5+
6+
# count stars
7+
8+
from selenium import webdriver
9+
10+
browser = webdriver.Firefox()
11+
12+
url = 'https://seedly.sg/reviews/p2p-lending/funding-societies'
13+
browser.get(url)
14+
15+
star_count_list = []
16+
17+
rating_column = browser.find_elements_by_xpath('//div[contains(@class,"qr0ren-7 euifNX")]')
18+
19+
for row in rating_column:
20+
stars = row.find_elements_by_xpath('.//span[contains(@style,"width:100%")]')
21+
star_count_list.append(len(stars))
22+
23+
for i, e in enumerate(star_count_list, 1):
24+
print('{}. {}'.format(i, e))

__scraping__/shopee.tw/main.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env python3
2+
3+
# date: 2020.01.02
4+
# https://stackoverflow.com/questions/59557071/how-can-i-crawl-the-product-items-from-shopee-website/59557626#59557626
5+
6+
# Without `Referer` it doesn't send price
7+
8+
import requests
9+
10+
url = 'https://shopee.tw/api/v2/search_items/?by=pop&limit=30&match_id=1819984&newest=0&order=desc&page_type=shop&shop_categoryids=9271157&version=2'
11+
12+
headers = {
13+
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0',
14+
'Referer': 'https://shopee.tw/shop/1819984/search?shopCollection=9271157',
15+
'X-Requested-With': 'XMLHttpRequest',
16+
}
17+
18+
r = requests.get(url, headers=headers)
19+
20+
data = r.json()
21+
22+
#print(data['items'][0].keys())
23+
24+
for item in data['items']:
25+
print('name:', item['name'])
26+
print('prince:', item['price'])
27+
print('---')

0 commit comments

Comments
 (0)