File tree Expand file tree Collapse file tree 5 files changed +130
-0
lines changed
msih.bgu.ac.il - requests BS Expand file tree Collapse file tree 5 files changed +130
-0
lines changed Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+
3
+ # date: 2019.12.30
4
+ # https://stackoverflow.com/questions/59535798/python-webscraping-with-beautifulsoup-not-displaying-full-content/59536553#59536553
5
+
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+
9
+ r = requests .get ("https://www.forexfactory.com/#detail=108867" )
10
+ # page uses JavaScript to redirect page so browser may shows different results.
11
+
12
+ soup = BeautifulSoup (r .text , 'lxml' )
13
+
14
+ table = soup .find ("table" , class_ = "calendar__table" )
15
+
16
+ for row in table .find_all ('tr' , class_ = 'calendar__row--grey' ):
17
+
18
+ currency = row .find ("td" , class_ = "currency" )
19
+ #print(currency.prettify()) # before get text
20
+ currency = currency .get_text (strip = True )
21
+
22
+ actual = row .find ("td" , class_ = "actual" )
23
+ actual = actual .get_text (strip = True )
24
+
25
+ forecast = row .find ("td" , class_ = "forecast" )
26
+ forecast = forecast .get_text (strip = True )
27
+
28
+ print (currency , actual , forecast )
29
+
30
+
Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+
3
+ # date: 2020.01.01
4
+ # https://stackoverflow.com/questions/59551193/i-want-to-download-images-from-python-what-should-i-do/
5
+
6
+ from selenium import webdriver
7
+ import requests
8
+
9
+ #path = r"C:\Users\qpslt\Desktop\py\chromedriver_win32\chromedriver.exe"
10
+ #driver = webdriver.Chrome(path)
11
+ driver = webdriver .Firefox ()
12
+
13
+ url = "https://gall.dcinside.com/board/view/?id=baseball_new8&no=10131338&exception_mode=recommend&page=1"
14
+ driver .get (url )
15
+
16
+ images = driver .find_elements_by_xpath ('//div[@class="writing_view_box"]//img' )
17
+
18
+ for i , img in enumerate (images , 1 ):
19
+ img_url = img .get_attribute ('src' )
20
+ print (i , img_url )
21
+
22
+ r = requests .get (img_url , headers = {'Referer' : url })
23
+
24
+ with open ("c:/test/{}.jpg" .format (i ), 'wb' ) as f :
25
+ f .write (r .content )
26
+
Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+
3
+ # date: 2020.01.02
4
+ # ???
5
+
6
+ import urllib .request
7
+ import bs4 as bs
8
+
9
+ sauce = urllib .request .urlopen ('https://msih.bgu.ac.il/md-program/residency-placements/' ).read ()
10
+ soup = bs .BeautifulSoup (sauce , 'lxml' )
11
+
12
+ headers = soup .find_all ('div' , class_ = {'accord-head' })
13
+ grad_yr_list = []
14
+ for header in headers :
15
+ grad_yr_list .append (header .h2 .text [- 4 :])
16
+
17
+ rez_classes = soup .find_all ('div' , class_ = {'accord-con' })
18
+
19
+ data_dict = dict (zip (grad_yr_list , rez_classes ))
20
+
21
+ for key , value in data_dict .items ():
22
+ print (type (value ), key , value .find ('h4' ).text )
23
+
Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+
3
+ # date: 2020.01.02
4
+ # ???
5
+
6
+ # count stars
7
+
8
+ from selenium import webdriver
9
+
10
+ browser = webdriver .Firefox ()
11
+
12
+ url = 'https://seedly.sg/reviews/p2p-lending/funding-societies'
13
+ browser .get (url )
14
+
15
+ star_count_list = []
16
+
17
+ rating_column = browser .find_elements_by_xpath ('//div[contains(@class,"qr0ren-7 euifNX")]' )
18
+
19
+ for row in rating_column :
20
+ stars = row .find_elements_by_xpath ('.//span[contains(@style,"width:100%")]' )
21
+ star_count_list .append (len (stars ))
22
+
23
+ for i , e in enumerate (star_count_list , 1 ):
24
+ print ('{}. {}' .format (i , e ))
Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python3
2
+
3
+ # date: 2020.01.02
4
+ # https://stackoverflow.com/questions/59557071/how-can-i-crawl-the-product-items-from-shopee-website/59557626#59557626
5
+
6
+ # Without `Referer` it doesn't send price
7
+
8
+ import requests
9
+
10
+ url = 'https://shopee.tw/api/v2/search_items/?by=pop&limit=30&match_id=1819984&newest=0&order=desc&page_type=shop&shop_categoryids=9271157&version=2'
11
+
12
+ headers = {
13
+ 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0' ,
14
+ 'Referer' : 'https://shopee.tw/shop/1819984/search?shopCollection=9271157' ,
15
+ 'X-Requested-With' : 'XMLHttpRequest' ,
16
+ }
17
+
18
+ r = requests .get (url , headers = headers )
19
+
20
+ data = r .json ()
21
+
22
+ #print(data['items'][0].keys())
23
+
24
+ for item in data ['items' ]:
25
+ print ('name:' , item ['name' ])
26
+ print ('prince:' , item ['price' ])
27
+ print ('---' )
You can’t perform that action at this time.
0 commit comments