diff --git a/CMakeLists.txt b/CMakeLists.txt index b20fb44..62d49a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ foreach(target ctc normalizer) set_target_properties(${target} PROPERTIES PREFIX "" SUFFIX ".so" - LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/../${target}" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/../" ) target_link_libraries(${target} TH luaT luajit) endforeach(target) diff --git a/ctc.lua b/ctc.lua index f159745..14434b3 100644 --- a/ctc.lua +++ b/ctc.lua @@ -150,7 +150,6 @@ function ctc.getCTCCost(outputTable, target) fvs = ctc.getForwardVariable(outputTable, alignedTable, target) - -- calculate backwardVariable bvs = ctc.getBackwardVariable(outputTable, alignedTable, target) diff --git a/loader.lua b/loader.lua index a70c699..e5cb0b4 100644 --- a/loader.lua +++ b/loader.lua @@ -1,4 +1,6 @@ +require 'image' require 'codec' +require 'normalizer' utf8 = require 'utf8' Loader = { @@ -27,11 +29,34 @@ function Loader:new(o) return o end +function Loader.__getNormalizedImage(src) + local im = image.load(src, 1) + + if im:dim() == 3 then + im = im[1] + end + + output = torch.Tensor() + + w = im:size()[2] + h = im:size()[1] + + ones = torch.ones(h, w) + + im = ones - im + + normalizer.normalize(im, output) + + return output +end + function Loader:load(file) self.samples = {} local f = assert(io.open(file, "r")) for line in f:lines() do local src = line + local im = Loader.__getNormalizedImage(src) + local gt = src:gsub(".png", ".gt.txt") local cf = assert(io.open(gt, "r")) local gt = cf:read("*line") @@ -45,7 +70,7 @@ function Loader:load(file) end - table.insert(self.samples, {src = src, gt = gt}) + table.insert(self.samples, {src = src, gt = gt, img = im}) end f:close() @@ -56,7 +81,7 @@ function Loader:load(file) self.codec_obj = nil self.weights = nil - return self.samples + -- return self.samples end function Loader:pick() @@ -74,8 +99,8 @@ function Loader:pickWithWeight() self.p = torch.zeros(#self.samples) local i = 0 - self.p:apply(function() - i = i + 1 + self.p:apply(function() + i = i + 1 return torch.normal(1.0 / self.weights[i], 1.0 / self.weights[i] / 3.0) end) end diff --git a/main.lua b/main.lua index 9a4908c..973257b 100644 --- a/main.lua +++ b/main.lua @@ -3,6 +3,7 @@ require 'rnn' require 'image' require 'optim' +require 'loader' require 'ctc_log' require 'utils/decoder' @@ -10,7 +11,7 @@ mnist = require 'mnist' DROPOUT_RATE = 0.4 -local input_size = 28 +local input_size = 64 local hidden_size = 100 local class_num = 10 @@ -36,12 +37,27 @@ state = { momentum = 0.5 } +loader = Loader() +loader:load("1.txt") +codec = loader:codec() + +local sample = loader:pick() +local im = sample.img +local target = codec:encode(sample.gt) + +raw = image.load(sample.src, 1) + +print(raw[1]) + +print(im) + +--[[ for i = 1, 100000 do - local no = torch.random() % 100 + 1 - local sample = mnist.traindataset()[no] - local im = sample.x:double():t() - local target = torch.Tensor{sample.y + 1} - + local sample = loader:pick() + local im = sample.img + local target = codec:encode(sample.gt) + + print(im) local feval = function(params) net:forget() @@ -51,8 +67,8 @@ for i = 1, 100000 do loss, grad = ctc.getCTCCostAndGrad(outputTable, target) if i % 20 == 0 then - print(target[1] - 1) - print(decoder.decodeTable(outputTable)) + print(sample.gt) + print(decoder.best_path_decode(outputTable)) print(loss) end @@ -67,3 +83,6 @@ for i = 1, 100000 do optim.sgd(feval, params, state) end + + +]] diff --git a/normalizer.cc b/normalizer.cc index 4ee0fdd..9ec6553 100644 --- a/normalizer.cc +++ b/normalizer.cc @@ -114,14 +114,14 @@ static double bilinear(double * in, int w, int h, double x, double y) { xt = xt > w - 1 ? w - 1 : xt; yt = yt > h - 1 ? h - 1 : yt; - // printf("(%d, %d)\n", xi, yi); + printf("(%d, %d)\n", xi, yi); double p00 = in[yi * w + xi]; double p01 = in[yt * w + xi]; double p10 = in[yi * w + xt]; double p11 = in[yt * w + xt]; - return p00 * (1.0 - xf) * (1.0 - yf) + p10 * xf * (1.0 - yf) + p01 * (1.0 - xf) * yf + p11 * xf * yf; + p00 * (1.0 - xf) * (1.0 - yf) + p10 * xf * (1.0 - yf) + p01 * (1.0 - xf) * yf + p11 * xf * yf; } @@ -174,7 +174,7 @@ static void normalize THDoubleTensor_resize2d(out, target_height, target_width); - printf("scale = %.4f\n", scale); + // printf("scale = %.4f\n", scale); double * outData = THDoubleTensor_data(out); diff --git a/normalizer.lua b/test_normalizer.lua similarity index 89% rename from normalizer.lua rename to test_normalizer.lua index d60dd38..f4d3d0b 100644 --- a/normalizer.lua +++ b/test_normalizer.lua @@ -1,5 +1,5 @@ require 'image' -require 'normalizer/normalizer' +require 'normalizer' im = image.load("bq01_006.png", 1) diff --git a/utils/decoder.lua b/utils/decoder.lua index f229274..c66de16 100644 --- a/utils/decoder.lua +++ b/utils/decoder.lua @@ -12,7 +12,7 @@ function decoder.best_path_decode(outputTable, codec) local max_val, max = torch.max(outputTable[i], 1) max = max[1] - if max = class_num then + if max == class_num then if last_max ~= -1 and last_max_class ~= nil then table.insert(result, last_max_class) last_max = -1