-
Notifications
You must be signed in to change notification settings - Fork 0
/
readnews.py
113 lines (86 loc) · 3.16 KB
/
readnews.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/python
# -*- coding: utf-8 -*-
########
# Listen to the local news on ChronicleLive
# Copyright 2016 Paul Ashton
#
# Uses gtts library from https://github.com/pndurette/gTTS
########
import re
import time
try:
import requests
from gtts import gTTS
from pygame import mixer
except:
print("Can't continue. The following packages are required: requests, gtts, pygame.")
exit()
class readnews(object):
def __init__(self):
self.NEWSURL = "http://www.chroniclelive.co.uk/news/north-east-news/"
self.NUMARTICLES = 3
def DoFixes(self, text):
#Substitutions (seems Chronicle can't decide on a standard)
reps = ((r'
', ''), (r' ', ''), (r''', '\''),
(r''', '\''), (r'%', '%'), (r'?', '?'),
(r'(', '('), (r')', ')'), (r'&', '&'),
(r'L-R', 'Left-to-Right,'), (r'"', '\''),
(r'(?:\xa3|£)(\d{1,3}[,m\d{3}]+)', r'\1 (POUNDS)'), #Handle pounds
(r'[\'\"](.*?)[\'\"](?![s|r])', r'(QUOTE) \1 (UNQUOTE)') #Handle quotes
)
for rep in reps:
text = re.sub(rep[0], rep[1], text)
return text.strip() #Strip whitespace
def getArticles(self, numarticles=10):
"""
Get articles from webpage using only regex as XML libraries are bloaty and slow :)
"""
print("Retrieving news...")
articles = []
try:
html = requests.get(self.NEWSURL).text
html = re.search(r'(?s)<section data-group="topStories">(.+?)</section>', html).group() # We are only interested in the top-stories section
for article in re.findall(r'(?s)(<div class="teaser">.+?</div></div></div>)', html):
headline = re.search(r'(?s)<strong><a href.+?>(.+?)</a></strong>', article).group(1)
strapline = re.search(r'(?s)<div class="description"><a href.+?>(.+?)</a></div>', article).group(1)
articles.append((headline, strapline))
if len(articles) >= self.NUMARTICLES:
break
except:
pass
if not articles:
exit("Error: Could not find any news - it is possible that the site is down")
return articles
def buildTopStories(self, articlelist):
topstories = "Top {0} news articles from Chronicle Live:\n".format(len(articlelist))
for n, article in enumerate(articlelist):
headline = self.DoFixes(article[0])
strapline = self.DoFixes(article[1])
topstories += u"Article {0}:\n{1}\n{2}\n\n\n".format(n+1, headline, strapline)
return topstories
def textToMP3(self, text, filename="temp.mp3"):
print("Converting to MP3..")
tts = gTTS(text=text, lang='en')
tts.save(filename)
def playAndWait(self, filename="temp.mp3"):
print("Playing.. (ctrl+c to stop)")
mixer.init()
mixer.music.load(filename)
mixer.music.play()
try:
while(mixer.music.get_busy()):
time.sleep(1)
except KeyboardInterrupt:
print("Stopped by user.")
return True
def readNews(self):
articles = self.getArticles(self.NUMARTICLES)
news = self.buildTopStories(articles)
print(news)
self.textToMP3(news)
self.playAndWait()
print("All done!")
return False
if __name__ == '__main__':
r = readnews()
r.readNews()