-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtolmdb.py
117 lines (89 loc) · 3.64 KB
/
tolmdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import numpy as np
from scipy import io
import lmdb
import caffe
from PIL import Image
NUM_IDX_DIGITS = 10
IDX_FMT = '{:0>%d' % NUM_IDX_DIGITS + 'd}'
def scalars_to_lmdb(scalars, path_dst,
lut=None):
'''
Generate LMDB file from list of scalars
'''
db = lmdb.open(path_dst, map_size=int(1e12))
with db.begin(write=True) as in_txn:
if not hasattr(scalars, '__iter__'):
scalars = np.array([scalars])
for idx, x in enumerate(scalars):
print 'scalar:::'+str(idx)
if not hasattr(x, '__iter__'):
content_field = np.array([x])
else:
content_field = np.array(x)
# validate these are scalars
if content_field.size != 1:
raise AttributeError("Unexpected shape for scalar at i=%d (%s)"
% (idx, str(content_field.shape)))
# guarantee shape (1,1,1)
while len(content_field.shape) < 3:
content_field = np.expand_dims(content_field, axis=0)
content_field = content_field.astype(int)
if lut is not None:
content_field = lut(content_field)
dat = caffe.io.array_to_datum(content_field)
in_txn.put(IDX_FMT.format(idx), dat.SerializeToString())
db.close()
return 0
def arrays_to_lmdb(arrs, path_dst):
'''
Generate LMDB file from list of ndarrays
'''
db = lmdb.open(path_dst, map_size=int(1e12))
with db.begin(write=True) as in_txn:
for idx, x in enumerate(arrs):
print 'array:::'+str(idx)
content_field = x
while len(content_field.shape) < 3:
content_field = np.expand_dims(content_field, axis=0)
dat = caffe.io.array_to_datum(content_field)
in_txn.put(IDX_FMT.format(idx), dat.SerializeToString())
db.close()
return 0
def img_to_lmdb(paths_src,path_dst):
in_db = lmdb.open(path_dst, map_size=int(1e12))
with in_db.begin(write=True) as in_txn:
for in_idx, in_ in enumerate(paths_src):
print 'img:::'+str(in_)
# load image:
# - as np.uint8 {0, ..., 255}
# - in BGR (switch from RGB)
# - in Channel x Height x Width order (switch from H x W x C)
im = np.array(Image.open(in_)) # or load whatever ndarray you need
im = im[:,:,::-1]
im = im.transpose((2,0,1))
im_dat = caffe.io.array_to_datum(im)
in_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString())
in_db.close()
def matfiles_to_lmdb(paths_src, path_dst, fieldname,
lut=None):
'''
Generate LMDB file from set of mat files with integer data
Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045
credit: Evan Shelhamer
'''
db = lmdb.open(path_dst, map_size=int(1e12))
with db.begin(write=True) as in_txn:
for idx, path_ in enumerate(paths_src):
print 'label:::'+str(idx)
content_field = io.loadmat(path_)[fieldname]
# get shape (1,H,W)
while len(content_field.shape) < 3:
content_field = np.expand_dims(content_field, axis=0)
content_field = content_field.astype(int)
if lut is not None:
content_field = lut(content_field)
img_dat = caffe.io.array_to_datum(content_field)
in_txn.put(IDX_FMT.format(idx), img_dat.SerializeToString())
db.close()
return 0