|
2 | 2 | import requests |
3 | 3 | import json |
4 | 4 | import urllib |
| 5 | +import random |
| 6 | +import urllib2 |
| 7 | + |
| 8 | + |
| 9 | + |
| 10 | + |
| 11 | +class datos: |
| 12 | + |
| 13 | + def __init__(self, link=""): |
| 14 | + self.link = link |
| 15 | + |
| 16 | + def UserInput(self): |
| 17 | + |
| 18 | + #s = "http://www.thehindu.com/todays-paper/tp-opinion/the-ai-battlefield/article20376166.ece" |
| 19 | + #s = "https://timesofindia.indiatimes.com/india/no-country-can-thrive-without-equal-opportunity-for-half-its-population/articleshow/61827809.cms" |
| 20 | + #s = "http://www.wionews.com/india-news/watch-india-an-inspiration-for-the-world-ivanka-trump-at-ges-2017-25369" |
| 21 | + #s = "http://www.wionews.com/world/london-police-closes-roads-to-gherkin-skyscraper-after-suspicious-vehicle-found-25370" |
| 22 | + #s = "https://timesofindia.indiatimes.com/city/hyderabad/made-in-india-bot-mitra-to-welcome-pm-narendra-modi-ivanka-trump-at-ges/articleshow/61827978.cms" |
| 23 | + #s = "http://www.wionews.com/sports/cricket-australia-crush-england-by-10-wickets-in-1st-test-25214" |
| 24 | + #s = "http://www.hindustantimes.com/business-news/india-gdp-can-grow-by-150bn-if-it-halves-gender-gap-ivanka-trump/story-ch3QRAcwZpCyGMSiZ3SPcO.html" |
| 25 | + #s = "http://www.hindustantimes.com/fashion-and-trends/manushi-chhillar-on-her-winning-moment-wish-i-had-given-a-more-lady-like-reaction/story-pCJlLA3yUeoz6QucVVZDPI.html?li_source=LI&li_medium=recommended-for-you" |
| 26 | + |
| 27 | + s = urllib.quote_plus(str(self.link)) |
| 28 | + f = "https://api.diffbot.com/v3/article?token=2aca4b94adb14d3c02619c02a3d22cac&url=" + s |
| 29 | + |
| 30 | + |
| 31 | + r = requests.get(f) |
| 32 | + data = json.loads(r.content.decode("UTF-8")) |
| 33 | + #print(data) |
| 34 | + dd = data['objects'][0]['text'] |
| 35 | + return data['objects'][0]['title'], dd, data['objects'][0]['images'][0]['url'] |
| 36 | + |
| 37 | + |
| 38 | + def Summarize(self, data): |
| 39 | + blob = TextBlob(data) |
| 40 | + dh = blob.split(".") |
| 41 | + print(len(dh)) |
| 42 | + #print(dh) |
| 43 | + ll = [] |
| 44 | + fr = 0 |
| 45 | + qq = [] |
| 46 | + for lines in dh: |
| 47 | + blob = TextBlob(lines) |
| 48 | + qq = blob.tags |
| 49 | + for i in range(0,len(qq)): |
| 50 | + if (qq[i][1] == 'RB' or qq[i][1] == 'RBR' or qq[i][1] == 'RBS' or qq[i][1] == 'JJ' or qq[i][1] == 'JJS' or qq[i][1] == 'JJR' or qq[i][1] == 'NNP' or qq[i][1] == 'NNS'): |
| 51 | + fr+=1 |
| 52 | + ll.append(fr) |
| 53 | + fr=0 |
| 54 | + return ll, dh |
| 55 | + |
| 56 | + |
| 57 | + |
5 | 58 |
|
6 | 59 |
|
7 | | -s = "http://www.thehindu.com/todays-paper/tp-opinion/the-ai-battlefield/article20376166.ece" # Enter the URL of the news article here. |
8 | | -s = urllib.quote_plus(s) # encoding the URL |
9 | | -f = "https://api.diffbot.com/v3/article?token="Your API Token"&url=" + s # Here I have used diffbot API to scrap text only from the webite page. Get your API Key from thier official website https://www.diffbot.com/ |
10 | | - |
11 | | -r = requests.get(f) |
12 | | -data = json.loads(r.content.decode("UTF-8")) |
13 | | -dd = data['objects'][0]['text'] |
14 | | -blob = TextBlob(dd) |
15 | | -#print(blob) |
16 | | -dh = blob.split(".") |
17 | | -print(len(dh)) |
18 | | -#print(dh) |
19 | | -ll = [] |
20 | | -fr = 0 |
21 | | -qq = [] |
22 | | -for lines in dh: |
23 | | - blob = TextBlob(lines) |
24 | | - qq = blob.tags |
25 | | - for i in range(0,len(qq)): |
26 | | - if (qq[i][1] == 'RB' or qq[i][1] == 'RBR' or qq[i][1] == 'RBS' or qq[i][1] == 'JJ' or qq[i][1] == 'JJS' or qq[i][1] == 'JJR' or qq[i][1] == 'NNP' or qq[i][1] == 'NNS'): |
27 | | - fr+=1 |
28 | | - ll.append(fr) |
29 | | - fr=0 |
30 | | - |
31 | | -print(data['objects'][0]['title']) |
32 | | - |
33 | | -print("--------------") |
34 | | -for i in range(0,len(ll)): |
35 | | - if(ll[i]>8): |
36 | | - print(dh[i]) |
37 | 60 |
|
38 | 61 |
|
0 commit comments