1+ import requests
2+ from bs4 import BeautifulSoup
3+
4+ headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' }
5+
6+
7+ def extract_post (html , subreddit ):
8+ votes = html .find ("div" , {"class" :"_1rZYMD_4xY3gRcSS3p8ODO" })
9+ if votes :
10+ votes = votes .string
11+ title = html .find ("h3" , {"class" :"_eYtD2XCVieq6emjKBH3m" })
12+ if title :
13+ title = title .string
14+ link = html .find ("a" , {"class" :"SQnoC3ObvgnGjWt90zD9Z _2INHSNB8V5eaWp4P0rY_mE" })
15+ if link :
16+ link = link ['href' ]
17+ if votes and title and link :
18+ return {'votes' :int (votes ), 'title' :title , 'link' :link , 'subreddit' :subreddit }
19+ else :
20+ return None
21+
22+ def scrape_subreddit (subreddit ):
23+ all_posts = []
24+ try :
25+ url = f"https://www.reddit.com/r/{ subreddit } /top/?t=month"
26+ request = requests .get (url , headers = headers )
27+ soup = BeautifulSoup (request .text , "html.parser" )
28+ post_container = soup .find ("div" , {"class" :"rpBJOHq2PR60pnwJlUyP0" })
29+ if post_container :
30+ posts = post_container .find_all ("div" , {"class" : None }, recursive = False )
31+ for post in posts :
32+ exctracted_post = extract_post (post , subreddit )
33+ if exctracted_post :
34+ all_posts .append (exctracted_post )
35+ except Exception :
36+ pass
37+ return all_posts
38+
39+ def aggregate_subreddits (subreddits ):
40+ aggregated = []
41+ for subreddit in subreddits :
42+ posts = scrape_subreddit (subreddit )
43+ aggregated = aggregated + posts
44+ return aggregated
45+
46+
47+
48+ def check_subreddit (to_check ):
49+ try :
50+ check_request = requests .get (f"https://reddit.com/r/{ to_check } " , headers = headers )
51+ if check_request .status_code == 200 :
52+ return True
53+ else :
54+ return False
55+ except Exception as e :
56+ return False
0 commit comments