Skip to content

Commit 1ad3086

Browse files
authored
Update summarize.py
1 parent cc8da27 commit 1ad3086

File tree

1 file changed

+53
-30
lines changed

1 file changed

+53
-30
lines changed

Python27/SummarizeNewsArticle/summarize.py

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,37 +2,60 @@
22
import requests
33
import json
44
import urllib
5+
import random
6+
import urllib2
7+
8+
9+
10+
11+
class datos:
12+
13+
def __init__(self, link=""):
14+
self.link = link
15+
16+
def UserInput(self):
17+
18+
#s = "http://www.thehindu.com/todays-paper/tp-opinion/the-ai-battlefield/article20376166.ece"
19+
#s = "https://timesofindia.indiatimes.com/india/no-country-can-thrive-without-equal-opportunity-for-half-its-population/articleshow/61827809.cms"
20+
#s = "http://www.wionews.com/india-news/watch-india-an-inspiration-for-the-world-ivanka-trump-at-ges-2017-25369"
21+
#s = "http://www.wionews.com/world/london-police-closes-roads-to-gherkin-skyscraper-after-suspicious-vehicle-found-25370"
22+
#s = "https://timesofindia.indiatimes.com/city/hyderabad/made-in-india-bot-mitra-to-welcome-pm-narendra-modi-ivanka-trump-at-ges/articleshow/61827978.cms"
23+
#s = "http://www.wionews.com/sports/cricket-australia-crush-england-by-10-wickets-in-1st-test-25214"
24+
#s = "http://www.hindustantimes.com/business-news/india-gdp-can-grow-by-150bn-if-it-halves-gender-gap-ivanka-trump/story-ch3QRAcwZpCyGMSiZ3SPcO.html"
25+
#s = "http://www.hindustantimes.com/fashion-and-trends/manushi-chhillar-on-her-winning-moment-wish-i-had-given-a-more-lady-like-reaction/story-pCJlLA3yUeoz6QucVVZDPI.html?li_source=LI&li_medium=recommended-for-you"
26+
27+
s = urllib.quote_plus(str(self.link))
28+
f = "https://api.diffbot.com/v3/article?token=2aca4b94adb14d3c02619c02a3d22cac&url=" + s
29+
30+
31+
r = requests.get(f)
32+
data = json.loads(r.content.decode("UTF-8"))
33+
#print(data)
34+
dd = data['objects'][0]['text']
35+
return data['objects'][0]['title'], dd, data['objects'][0]['images'][0]['url']
36+
37+
38+
def Summarize(self, data):
39+
blob = TextBlob(data)
40+
dh = blob.split(".")
41+
print(len(dh))
42+
#print(dh)
43+
ll = []
44+
fr = 0
45+
qq = []
46+
for lines in dh:
47+
blob = TextBlob(lines)
48+
qq = blob.tags
49+
for i in range(0,len(qq)):
50+
if (qq[i][1] == 'RB' or qq[i][1] == 'RBR' or qq[i][1] == 'RBS' or qq[i][1] == 'JJ' or qq[i][1] == 'JJS' or qq[i][1] == 'JJR' or qq[i][1] == 'NNP' or qq[i][1] == 'NNS'):
51+
fr+=1
52+
ll.append(fr)
53+
fr=0
54+
return ll, dh
55+
56+
57+
558

659

7-
s = "http://www.thehindu.com/todays-paper/tp-opinion/the-ai-battlefield/article20376166.ece" # Enter the URL of the news article here.
8-
s = urllib.quote_plus(s) # encoding the URL
9-
f = "https://api.diffbot.com/v3/article?token="Your API Token"&url=" + s # Here I have used diffbot API to scrap text only from the webite page. Get your API Key from thier official website https://www.diffbot.com/
10-
11-
r = requests.get(f)
12-
data = json.loads(r.content.decode("UTF-8"))
13-
dd = data['objects'][0]['text']
14-
blob = TextBlob(dd)
15-
#print(blob)
16-
dh = blob.split(".")
17-
print(len(dh))
18-
#print(dh)
19-
ll = []
20-
fr = 0
21-
qq = []
22-
for lines in dh:
23-
blob = TextBlob(lines)
24-
qq = blob.tags
25-
for i in range(0,len(qq)):
26-
if (qq[i][1] == 'RB' or qq[i][1] == 'RBR' or qq[i][1] == 'RBS' or qq[i][1] == 'JJ' or qq[i][1] == 'JJS' or qq[i][1] == 'JJR' or qq[i][1] == 'NNP' or qq[i][1] == 'NNS'):
27-
fr+=1
28-
ll.append(fr)
29-
fr=0
30-
31-
print(data['objects'][0]['title'])
32-
33-
print("--------------")
34-
for i in range(0,len(ll)):
35-
if(ll[i]>8):
36-
print(dh[i])
3760

3861

0 commit comments

Comments
 (0)