File tree Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Original file line number Diff line number Diff line change @@ -73,8 +73,20 @@ def my_tokenizer(s):
73
73
74
74
def get_wikipedia_data (n_files , n_vocab , by_paragraph = False ):
75
75
prefix = '../large_files/'
76
+
77
+ if not os .path .exists (prefix ):
78
+ print "Are you sure you've downloaded, converted, and placed the Wikipedia data into the proper folder?"
79
+ print "I'm looking for a folder called large_files, adjacent to the class folder, but it does not exist."
80
+ print "Please download the data from https://dumps.wikimedia.org/"
81
+ exit ()
82
+
76
83
input_files = [f for f in os .listdir (prefix ) if f .startswith ('enwiki' ) and f .endswith ('txt' )]
77
84
85
+ if len (input_files ) == 0 :
86
+ print "Looks like you don't have any data files, or they're in the wrong location."
87
+ print "Please download the data from https://dumps.wikimedia.org/"
88
+ print "Quitting..."
89
+
78
90
# return variables
79
91
sentences = []
80
92
word2idx = {'START' : 0 , 'END' : 1 }
You can’t perform that action at this time.
0 commit comments