-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
135 lines (110 loc) · 4.81 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# test.py
# Authors: {Annie Lin, Joanne Koong}
# Date: {December 7, 2015}
# Emails: {annielin@college.harvard.edu, joannekoong@college.harvard.edu}
# ----------------
# This tests our methods in markov.py and reports stats.
import markov
import scrapy
import sys
import os
import tempfile
from subprocess import call
import re
import sys
import random
from textstat.textstat import textstat
import time
devnull = open(os.devnull, 'w')
def search(num_sentences, tag):
if os.path.isfile("~/Desktop/bobo.json"):
os.remove("~/Desktop/bobo.json")
call(["scrapy", "runspider", "stackoverflow_spider.py", "-a", "tag=%s" %tag ,"-o", "~/Desktop/bobo.json"],
stdout=devnull, stderr=devnull)
with open("~/Desktop/bobo.json") as f:
text = f.read()
sentences = ''
# Print sentences from random answers related to tag
for i in range(num_sentences):
s = markov.Search()
ans = str(s.searching("~/Desktop/bobo.json"))
sent_list = ans.split('.')
rando = random.randint(0, len(sent_list) - 1)
sentence = sent_list[rando]
if ans:
# remove too many spaces
sentence = " ".join(sentence.split())
sentences += sentence + '. '
else:
sentences += 'Recompile.'
os.remove("~/Desktop/bobo.json")
return sentences
def markovWeighting(num_sentences, tag):
if os.path.isfile("~/Desktop/bobo.json"):
os.remove("~/Desktop/bobo.json")
call(["scrapy", "runspider", "stackoverflow_spider.py", "-a", "tag=%s" %tag ,"-o", "~/Desktop/bobo.json"],
stdout=devnull, stderr=devnull)
with open("~/Desktop/bobo.json") as f:
text = f.read()
sentences = ''
# Print sentences from random answers related to tag
for i in range(num_sentences):
s = markov.Markov()
ans = str(s.marking("~/Desktop/bobo.json"))
sent_list = ans.split('.')
rando = random.randint(0, len(sent_list) - 1)
sentence = sent_list[rando]
if ans:
# remove too many spaces
sentence = " ".join(sentence.split())
sentences += sentence + '. '
else:
sentences += 'Recompile.'
os.remove("~/Desktop/bobo.json")
return sentences
def mmarkov(num_sentences, tag):
if os.path.isfile("~/Desktop/bobo.json"):
os.remove("~/Desktop/bobo.json")
call(["scrapy", "runspider", "stackoverflow_spider.py", "-a", "tag=%s" %tag ,"-o", "~/Desktop/bobo.json"],
stdout=devnull, stderr=devnull)
with open("~/Desktop/bobo.json") as f:
text = f.read()
sentences = ''
# Print sentences from random answers related to tag
for i in range(num_sentences):
s = markov.MarkovWeighting()
ans = str(s.marking("~/Desktop/bobo.json"))
sent_list = ans.split('.')
rando = random.randint(0, len(sent_list) - 1)
sentence = sent_list[rando]
if ans:
# remove too many spaces
sentence = " ".join(sentence.split())
sentences += sentence + '. '
else:
sentences += 'Recompile.'
os.remove("~/Desktop/bobo.json")
return sentences
if __name__ == "__main__":
sentences = int(input('Number of sentences: '))
tag = str(input('Tag (between quotes): '))
start_time = time.time()
search_result = search(sentences, tag)
print "1. Search: " + search_result
print "Search Stats: \n Running Time: %s \n Readability Index: %s \n Word Count: %s " % (str(time.time() - start_time),
str(textstat.automated_readability_index(search_result)),
str(textstat.lexicon_count(search_result)))
start_time = time.time()
markov_result = mmarkov(sentences, tag)
# markov_result = markovWeighting(sentences, tag)
print "2. Markov: " + markov_result
print "Markov Stats: \n Running Time: %s \n Readability Index: %s \n Word Count: %s " % (str(time.time() - start_time),
str(textstat.automated_readability_index(markov_result)),
str(textstat.lexicon_count(markov_result)))
start_time = time.time()
mw_result = markovWeighting(sentences, tag)
# mw_result = mmarkov(sentences, tag)
print "3. Markov Weighting: " + mw_result
print "Markov Weighting Stats: \n Running Time: %s \n Readability Index: %s \n Word Count: %s " % (str(time.time() - start_time),
str(textstat.automated_readability_index(mw_result)),
str(textstat.lexicon_count(mw_result)))