-
Notifications
You must be signed in to change notification settings - Fork 1
/
CuriosityTrendingparser.py
42 lines (36 loc) · 1.4 KB
/
CuriosityTrendingparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import requests
from bs4 import BeautifulSoup
class TrendingParser:
def __init__(self, in_db, new, to_post):
self.in_db = in_db
self.new = new
self.to_post = to_post
@staticmethod
def change_href():
path_my_href = 'C:/Users/Елена/PycharmProjects/curiosity-to-vk/my_href.db'
href_new = []
href_in_db = []
href_to_post = []
with open(path_my_href) as database:
for line in database:
href_in_db.append(line.replace('\n', ''))
r = requests.get("https://curiosity.com/trending/day/")
text = r.text
soup = BeautifulSoup(text, 'lxml')
items = soup.find_all('a', {'class': 'trending-grid-item'})
for item in items:
href = item.get('href')
href_new.append(str('http://curiosity.com' + href))
for href in set(href_new).difference(href_in_db):
href_to_post.append(href)
with open(path_my_href, 'a') as f:
for line in href_to_post:
f.write(str(line) + '\n')
if len(href_to_post) != 0:
with open('C:/Users/Елена/PycharmProjects/curiosity-to-vk/href-to-post.db', 'w') as h:
for line in href_to_post:
h.write(str(line) + '\n')
return href_in_db, href_new, href_to_post
x = 0
if __name__ == "__main__":
print("refresh_links")