Skip to content

Commit ee0fdf9

Browse files
add hints for data
1 parent 63c38d2 commit ee0fdf9

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

rnn_class/util.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,20 @@ def my_tokenizer(s):
7373

7474
def get_wikipedia_data(n_files, n_vocab, by_paragraph=False):
7575
prefix = '../large_files/'
76+
77+
if not os.path.exists(prefix):
78+
print "Are you sure you've downloaded, converted, and placed the Wikipedia data into the proper folder?"
79+
print "I'm looking for a folder called large_files, adjacent to the class folder, but it does not exist."
80+
print "Please download the data from https://dumps.wikimedia.org/"
81+
exit()
82+
7683
input_files = [f for f in os.listdir(prefix) if f.startswith('enwiki') and f.endswith('txt')]
7784

85+
if len(input_files) == 0:
86+
print "Looks like you don't have any data files, or they're in the wrong location."
87+
print "Please download the data from https://dumps.wikimedia.org/"
88+
print "Quitting..."
89+
7890
# return variables
7991
sentences = []
8092
word2idx = {'START': 0, 'END': 1}

0 commit comments

Comments
 (0)