Skip to content

Commit 2563d57

Browse files
committed
split functionality into separate modules
1 parent e876f89 commit 2563d57

File tree

3 files changed

+66
-39
lines changed

3 files changed

+66
-39
lines changed

batcher.lua

Lines changed: 15 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
local table = require 'table'
2-
local math = require 'math'
3-
local torch = require 'torch'
1+
require 'table'
2+
require 'math'
3+
require 'torch'
4+
require 'encoding'
5+
6+
local batcher = {}
47

58
-- function to read the text file
69
function load_text()
@@ -11,35 +14,6 @@ function load_text()
1114
return text
1215
end
1316

14-
-- function to create character based vocalulary with unique id for every
15-
-- character and return the vocabulary along with encoded dataset based on the
16-
-- ids.
17-
function char_to_ints(text)
18-
local alphabet = {}
19-
local encoded = torch.Tensor(#text)
20-
21-
for i = 1, #text do
22-
local c = text:sub(i, i)
23-
if alphabet[c] == nil then
24-
alphabet[#alphabet + 1] = c
25-
alphabet[c] = #alphabet
26-
end
27-
encoded[i] = alphabet[c]
28-
end
29-
30-
return alphabet, encoded
31-
end
32-
33-
-- function for one hot encoding
34-
function ints_to_one_hot(ints, width)
35-
local height = ints:size()[1]
36-
local zeros = torch.zeros(height, width)
37-
local indices = ints:view(-1, 1):long()
38-
local one_hot = zeros:scatter(2, indices, 1)
39-
40-
return one_hot
41-
end
42-
4317
function make_chunk_iterator(encoded_text, indices, chunk_size, n_symbols)
4418
function co()
4519
for i=1, indices:size(1) do
@@ -129,11 +103,13 @@ function make_batch_iterators(text, split_fractions, chunk_size, batch_size)
129103
return alphabet, batch_iterators
130104
end
131105

132-
local text = load_text()
133-
local fractions = torch.Tensor{0.25, 0.75}
134-
local alphabet, batch_iterators = make_batch_iterators(text, fractions, 2, 2)
106+
-- export the following functions globally
107+
batcher.make_batch_iterators = make_batch_iterators
108+
batcher.make_batch_iterator = make_batch_iterator
109+
batcher.stack = stack
110+
batcher.make_chunk_iterators = make_chunk_iterators
111+
batcher.split_indices = split_indices
112+
batcher.load_text = load_text
113+
batcher.make_chunk_iterator = make_chunk_iterator
135114

136-
for i, batch_iterator in pairs(batch_iterators) do
137-
print("> Batch Iterator: " .. i)
138-
print(batch_iterator())
139-
end
115+
return batcher

encoding.lua

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
require 'table'
2+
require 'torch'
3+
4+
local encoding = {}
5+
6+
-- function to create character based vocalulary with unique id for every
7+
-- character and return the vocabulary along with encoded dataset based on the
8+
-- ids.
9+
function char_to_ints(text)
10+
local alphabet = {}
11+
local encoded = torch.Tensor(#text)
12+
13+
for i = 1, #text do
14+
local c = text:sub(i, i)
15+
if alphabet[c] == nil then
16+
alphabet[#alphabet + 1] = c
17+
alphabet[c] = #alphabet
18+
end
19+
encoded[i] = alphabet[c]
20+
end
21+
22+
return alphabet, encoded
23+
end
24+
25+
-- function for one hot encoding
26+
function ints_to_one_hot(ints, width)
27+
local height = ints:size()[1]
28+
local zeros = torch.zeros(height, width)
29+
local indices = ints:view(-1, 1):long()
30+
local one_hot = zeros:scatter(2, indices, 1)
31+
32+
return one_hot
33+
end
34+
35+
-- export the following functions globally
36+
encoding.ints_to_one_hot = ints_to_one_hot
37+
encoding.char_to_ints = char_to_ints
38+
39+
return encoding

main.lua

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
require 'torch'
2+
require 'batcher'
3+
require 'encoding'
4+
5+
local text = load_text()
6+
local fractions = torch.Tensor{0.25, 0.75}
7+
local alphabet, batch_iterators = make_batch_iterators(text, fractions, 2, 2)
8+
9+
for i, batch_iterator in pairs(batch_iterators) do
10+
print("> Batch Iterator: " .. i)
11+
print(batch_iterator())
12+
end

0 commit comments

Comments
 (0)