1
- local table = require ' table'
2
- local math = require ' math'
3
- local torch = require ' torch'
1
+ require ' table'
2
+ require ' math'
3
+ require ' torch'
4
+ require ' encoding'
5
+
6
+ local batcher = {}
4
7
5
8
-- function to read the text file
6
9
function load_text ()
@@ -11,35 +14,6 @@ function load_text()
11
14
return text
12
15
end
13
16
14
- -- function to create character based vocalulary with unique id for every
15
- -- character and return the vocabulary along with encoded dataset based on the
16
- -- ids.
17
- function char_to_ints (text )
18
- local alphabet = {}
19
- local encoded = torch .Tensor (# text )
20
-
21
- for i = 1 , # text do
22
- local c = text :sub (i , i )
23
- if alphabet [c ] == nil then
24
- alphabet [# alphabet + 1 ] = c
25
- alphabet [c ] = # alphabet
26
- end
27
- encoded [i ] = alphabet [c ]
28
- end
29
-
30
- return alphabet , encoded
31
- end
32
-
33
- -- function for one hot encoding
34
- function ints_to_one_hot (ints , width )
35
- local height = ints :size ()[1 ]
36
- local zeros = torch .zeros (height , width )
37
- local indices = ints :view (- 1 , 1 ):long ()
38
- local one_hot = zeros :scatter (2 , indices , 1 )
39
-
40
- return one_hot
41
- end
42
-
43
17
function make_chunk_iterator (encoded_text , indices , chunk_size , n_symbols )
44
18
function co ()
45
19
for i = 1 , indices :size (1 ) do
@@ -129,11 +103,13 @@ function make_batch_iterators(text, split_fractions, chunk_size, batch_size)
129
103
return alphabet , batch_iterators
130
104
end
131
105
132
- local text = load_text ()
133
- local fractions = torch .Tensor {0.25 , 0.75 }
134
- local alphabet , batch_iterators = make_batch_iterators (text , fractions , 2 , 2 )
106
+ -- export the following functions globally
107
+ batcher .make_batch_iterators = make_batch_iterators
108
+ batcher .make_batch_iterator = make_batch_iterator
109
+ batcher .stack = stack
110
+ batcher .make_chunk_iterators = make_chunk_iterators
111
+ batcher .split_indices = split_indices
112
+ batcher .load_text = load_text
113
+ batcher .make_chunk_iterator = make_chunk_iterator
135
114
136
- for i , batch_iterator in pairs (batch_iterators ) do
137
- print (" > Batch Iterator: " .. i )
138
- print (batch_iterator ())
139
- end
115
+ return batcher
0 commit comments