-
Notifications
You must be signed in to change notification settings - Fork 0
/
childone.py
65 lines (60 loc) · 1.89 KB
/
childone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
'''
This is just a Header file for parent.py to use.
'''
import random, re
import requests, urllib2
from bs4 import BeautifulSoup
def scrap_wiki_doc(thing):
words = thing.split(' ')
tail = ''
for each in words:
tail = tail + each + '_'
tail = tail[:-1]
'''opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
url = 'http://gameofthrones.wikia.com/wiki/'+tail
response = opener.open(url)
page = response.read()
soup = BeautifulSoup(page, "lxml")'''
site= 'http://gameofthrones.wikia.com/wiki/'+tail
hdr = {'User-Agent': 'Mozilla/5.0'}
req = urllib2.Request(site,headers=hdr)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page, "lxml")
soup = soup.find(id="mw-content-text")
text = soup.get_text()
return text
def most_relevant_1(title):
doc = scrap_wiki_doc(title)
title = title.lower()
sentences = doc.split('\n')
vector = []
for each in sentences:
cnt = each.lower().count(title.split(' ')[0])
vector.append(cnt)
arr = []
for i in range(len(vector)):
if vector[i] > 1 and vector[i] < 8:
arr.append(i)
if arr == []:
return None
return re.sub(r'\[.+?\]', '', sentences[random.choice(arr)])
def open_txt_file(filename):
try:
with open('./entities/' + filename) as f:
content = f.readlines()
except Exception:
try:
with open('../entities/' + filename) as f:
content = f.readlines()
except Exception:
return ['Exception Error: entity tag file not found']
return content
def get_random(filename):
f = open_txt_file(filename)
rv = random.choice(f)
title = rv[:-1]
r = most_relevant_1(title)
while r == None:
r = get_random(random.choice(['houses.txt', 'castles.txt', 'characters.txt']))
return title + ' : ' + r