Skip to content

Commit eb18cf3

Browse files
authored
feature/enhance image storage (#120)
1 parent 41b4f4b commit eb18cf3

File tree

7 files changed

+197
-82
lines changed

7 files changed

+197
-82
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ add_executable(vl_test
5151
${PROJECT_SOURCE_DIR}/visualdl/logic/sdk_test.cc
5252
${PROJECT_SOURCE_DIR}/visualdl/logic/histogram_test.cc
5353
${PROJECT_SOURCE_DIR}/visualdl/storage/storage_test.cc
54+
${PROJECT_SOURCE_DIR}/visualdl/storage/test_binary_record.cc
5455
${PROJECT_SOURCE_DIR}/visualdl/utils/test_concurrency.cc
5556
${PROJECT_SOURCE_DIR}/visualdl/utils/test_image.cc
5657
${PROJECT_SOURCE_DIR}/visualdl/utils/concurrency.h

visualdl/logic/sdk.cc

Lines changed: 90 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,83 @@
11
#include "visualdl/logic/sdk.h"
22

33
#include "visualdl/logic/histogram.h"
4+
#include "visualdl/storage/binary_record.h"
45
#include "visualdl/utils/image.h"
56
#include "visualdl/utils/macro.h"
67

78
namespace visualdl {
89

10+
// global log dir, a hack solution to pass accross all the components.
11+
// One process of VDL backend can only process a single logdir, so this
12+
// is OK.
13+
std::string g_log_dir;
14+
15+
LogWriter LogWriter::AsMode(const std::string& mode) {
16+
LogWriter writer = *this;
17+
storage_.AddMode(mode);
18+
writer.mode_ = mode;
19+
return writer;
20+
}
21+
22+
Tablet LogWriter::AddTablet(const std::string& tag) {
23+
// TODO(ChunweiYan) add string check here.
24+
auto tmp = mode_ + "/" + tag;
25+
string::TagEncode(tmp);
26+
auto res = storage_.AddTablet(tmp);
27+
res.SetCaptions(std::vector<std::string>({mode_}));
28+
res.SetTag(mode_, tag);
29+
return res;
30+
}
31+
32+
LogReader::LogReader(const std::string& dir) : reader_(dir) { g_log_dir = dir; }
33+
34+
LogReader LogReader::AsMode(const std::string& mode) {
35+
auto tmp = *this;
36+
tmp.mode_ = mode;
37+
return tmp;
38+
}
39+
40+
TabletReader LogReader::tablet(const std::string& tag) {
41+
auto tmp = mode_ + "/" + tag;
42+
string::TagEncode(tmp);
43+
return reader_.tablet(tmp);
44+
}
45+
46+
std::vector<std::string> LogReader::all_tags() {
47+
auto tags = reader_.all_tags();
48+
auto it =
49+
std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) {
50+
return !TagMatchMode(tag, mode_);
51+
});
52+
tags.erase(it + 1);
53+
return tags;
54+
}
55+
56+
std::vector<std::string> LogReader::tags(const std::string& component) {
57+
auto type = Tablet::type(component);
58+
auto tags = reader_.tags(type);
59+
CHECK(!tags.empty()) << "component " << component << " has no taged records";
60+
std::vector<std::string> res;
61+
for (const auto& tag : tags) {
62+
if (TagMatchMode(tag, mode_)) {
63+
res.push_back(GenReadableTag(mode_, tag));
64+
}
65+
}
66+
return res;
67+
}
68+
69+
std::string LogReader::GenReadableTag(const std::string& mode,
70+
const std::string& tag) {
71+
auto tmp = tag;
72+
string::TagDecode(tmp);
73+
return tmp.substr(mode.size() + 1); // including `/`
74+
}
75+
76+
bool LogReader::TagMatchMode(const std::string& tag, const std::string& mode) {
77+
if (tag.size() <= mode.size()) return false;
78+
return tag.substr(0, mode.size()) == mode;
79+
}
80+
981
namespace components {
1082

1183
template <typename T>
@@ -103,8 +175,10 @@ void Image::SetSample(int index,
103175
new_shape.emplace_back(1);
104176
}
105177
// production
106-
int size = std::accumulate(
107-
new_shape.begin(), new_shape.end(), 1., [](int a, int b) { return a * b; });
178+
int size =
179+
std::accumulate(new_shape.begin(), new_shape.end(), 1., [](int a, int b) {
180+
return a * b;
181+
});
108182
CHECK_GT(size, 0);
109183
CHECK_LE(new_shape.size(), 3)
110184
<< "shape should be something like (width, height, num_channel)";
@@ -114,30 +188,28 @@ void Image::SetSample(int index,
114188
CHECK_LT(index, num_samples_);
115189
CHECK_LE(index, num_records_);
116190

117-
auto entry = step_.MutableData<std::vector<byte_t>>(index);
118191
// trick to store int8 to protobuf
119192
std::vector<byte_t> data_str(data.size());
120193
for (int i = 0; i < data.size(); i++) {
121194
data_str[i] = data[i];
122195
}
123196
Uint8Image image(new_shape[2], new_shape[0] * new_shape[1]);
124197
NormalizeImage(&image, &data[0], new_shape[0] * new_shape[1], new_shape[2]);
125-
// entry.SetRaw(std::string(data_str.begin(), data_str.end()));
126-
entry.SetRaw(
198+
199+
BinaryRecord brcd(
200+
GenBinaryRecordDir(step_.parent()->dir()),
127201
std::string(image.data(), image.data() + image.rows() * image.cols()));
202+
brcd.tofile();
203+
204+
auto entry = step_.MutableData<std::vector<byte_t>>(index);
205+
entry.SetRaw(brcd.hash());
128206

129207
static_assert(
130208
!is_same_type<value_t, shape_t>::value,
131209
"value_t should not use int64_t field, this type is used to store shape");
132210

133211
// set meta.
134212
entry.SetMulti(new_shape);
135-
136-
// // set meta with hack
137-
// Entry<shape_t> meta;
138-
// meta.set_parent(entry.parent());
139-
// meta.entry = entry.entry;
140-
// meta.SetMulti(shape);
141213
}
142214

143215
std::string ImageReader::caption() {
@@ -154,9 +226,13 @@ ImageReader::ImageRecord ImageReader::record(int offset, int index) {
154226
ImageRecord res;
155227
auto record = reader_.record(offset);
156228
auto entry = record.data(index);
157-
auto data_str = entry.GetRaw();
158-
std::transform(data_str.begin(),
159-
data_str.end(),
229+
auto data_hash = entry.GetRaw();
230+
CHECK(!g_log_dir.empty())
231+
<< "g_log_dir should be set in LogReader construction";
232+
BinaryRecordReader brcd(GenBinaryRecordDir(g_log_dir), data_hash);
233+
234+
std::transform(brcd.data.begin(),
235+
brcd.data.end(),
160236
std::back_inserter(res.data),
161237
[](byte_t i) { return (int)(i); });
162238
res.shape = entry.GetMulti<shape_t>();

visualdl/logic/sdk.h

Lines changed: 9 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -31,25 +31,9 @@ class LogWriter {
3131
storage_.AddMode(mode);
3232
}
3333

34-
LogWriter AsMode(const std::string& mode) {
35-
LogWriter writer = *this;
36-
storage_.AddMode(mode);
37-
writer.mode_ = mode;
38-
return writer;
39-
}
34+
LogWriter AsMode(const std::string& mode);
4035

41-
/**
42-
* create a new tablet
43-
*/
44-
Tablet AddTablet(const std::string& tag) {
45-
// TODO(ChunweiYan) add string check here.
46-
auto tmp = mode_ + "/" + tag;
47-
string::TagEncode(tmp);
48-
auto res = storage_.AddTablet(tmp);
49-
res.SetCaptions(std::vector<std::string>({mode_}));
50-
res.SetTag(mode_, tag);
51-
return res;
52-
}
36+
Tablet AddTablet(const std::string& tag);
5337

5438
Storage& storage() { return storage_; }
5539

@@ -64,61 +48,26 @@ class LogWriter {
6448
*/
6549
class LogReader {
6650
public:
67-
LogReader(const std::string& dir) : reader_(dir) {}
51+
LogReader(const std::string& dir);
6852

6953
void SetMode(const std::string& mode) { mode_ = mode; }
7054

71-
LogReader AsMode(const std::string& mode) {
72-
auto tmp = *this;
73-
tmp.mode_ = mode;
74-
return tmp;
75-
}
55+
LogReader AsMode(const std::string& mode);
7656

7757
const std::string& mode() { return mode_; }
7858

79-
TabletReader tablet(const std::string& tag) {
80-
auto tmp = mode_ + "/" + tag;
81-
string::TagEncode(tmp);
82-
return reader_.tablet(tmp);
83-
}
59+
TabletReader tablet(const std::string& tag);
8460

85-
std::vector<std::string> all_tags() {
86-
auto tags = reader_.all_tags();
87-
auto it =
88-
std::remove_if(tags.begin(), tags.end(), [&](const std::string& tag) {
89-
return !TagMatchMode(tag, mode_);
90-
});
91-
tags.erase(it + 1);
92-
return tags;
93-
}
61+
std::vector<std::string> all_tags();
9462

95-
std::vector<std::string> tags(const std::string& component) {
96-
auto type = Tablet::type(component);
97-
auto tags = reader_.tags(type);
98-
CHECK(!tags.empty()) << "component " << component
99-
<< " has no taged records";
100-
std::vector<std::string> res;
101-
for (const auto& tag : tags) {
102-
if (TagMatchMode(tag, mode_)) {
103-
res.push_back(GenReadableTag(mode_, tag));
104-
}
105-
}
106-
return res;
107-
}
63+
std::vector<std::string> tags(const std::string& component);
10864

10965
StorageReader& storage() { return reader_; }
11066

11167
static std::string GenReadableTag(const std::string& mode,
112-
const std::string& tag) {
113-
auto tmp = tag;
114-
string::TagDecode(tmp);
115-
return tmp.substr(mode.size() + 1); // including `/`
116-
}
68+
const std::string& tag);
11769

118-
static bool TagMatchMode(const std::string& tag, const std::string& mode) {
119-
if (tag.size() <= mode.size()) return false;
120-
return tag.substr(0, mode.size()) == mode;
121-
}
70+
static bool TagMatchMode(const std::string& tag, const std::string& mode);
12271

12372
protected:
12473
private:

visualdl/server/lib.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,6 @@ def get_invididual_image(storage, mode, tag, step_index, max_size=80):
137137
data = np.array(record.data(), dtype='uint8').reshape(shape)
138138
tempfile = NamedTemporaryFile(mode='w+b', suffix='.png')
139139
with Image.fromarray(data) as im:
140-
size = max(shape[0], shape[1])
141-
if size > max_size:
142-
scale = max_size * 1. / size
143-
scaled_shape = (int(shape[0] * scale), int(shape[1] * scale))
144-
im = im.resize(scaled_shape)
145140
im.save(tempfile)
146141
tempfile.seek(0, 0)
147142
return tempfile

visualdl/storage/binary_record.h

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#ifndef VISUALDL_STORAGE_BINARY_RECORD_H
2+
#define VISUALDL_STORAGE_BINARY_RECORD_H
3+
4+
#include <fstream>
5+
#include <functional>
6+
7+
#include "visualdl/utils/filesystem.h"
8+
9+
namespace visualdl {
10+
11+
static std::string GenBinaryRecordDir(const std::string& dir) {
12+
return dir + "/binary_records";
13+
}
14+
15+
// A storage helper to save large file(currently just for Image component).
16+
// The protobuf message has some limitation on meassage size, and LogWriter
17+
// will maintain a memory of all the messages, it is bad to store images
18+
// directly in protobuf. So a simple binary storage is used to serialize images
19+
// to disk.
20+
struct BinaryRecord {
21+
std::hash<std::string> hasher;
22+
23+
BinaryRecord(const std::string dir, std::string&& data)
24+
: data_(data), dir_(dir) {
25+
hash_ = std::to_string(hasher(data));
26+
path_ = dir + "/" + hash();
27+
}
28+
29+
const std::string& path() { return path_; }
30+
31+
void tofile() {
32+
fs::TryRecurMkdir(dir_);
33+
std::fstream file(path_, file.binary | file.out);
34+
CHECK(file.is_open()) << "open " << path_ << " failed";
35+
36+
size_t size = data_.size();
37+
file.write(reinterpret_cast<char*>(&size), sizeof(size));
38+
file.write(data_.data(), data_.size());
39+
}
40+
41+
const std::string& hash() { return hash_; }
42+
43+
private:
44+
std::string dir_;
45+
std::string path_;
46+
std::string data_;
47+
std::string hash_;
48+
};
49+
50+
struct BinaryRecordReader {
51+
std::string data;
52+
std::hash<std::string> hasher;
53+
54+
BinaryRecordReader(const std::string& dir, const std::string& hash)
55+
: dir_(dir), hash_(hash) {
56+
fromfile();
57+
}
58+
std::string hash() { return std::to_string(hasher(data)); }
59+
60+
protected:
61+
void fromfile() {
62+
std::string path = dir_ + "/" + hash_;
63+
std::ifstream file(path, file.binary);
64+
CHECK(file.is_open()) << " failed to open file " << path;
65+
66+
size_t size;
67+
file.read(reinterpret_cast<char*>(&size), sizeof(size_t));
68+
data.resize(size);
69+
file.read(&data[0], size);
70+
71+
CHECK_EQ(hash(), hash_) << "data broken: " << path;
72+
}
73+
74+
private:
75+
std::string dir_;
76+
std::string hash_;
77+
};
78+
79+
} // namespace visualdl
80+
#endif
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include "visualdl/storage/binary_record.h"
2+
3+
#include <gtest/gtest.h>
4+
5+
using namespace visualdl;
6+
7+
TEST(BinaryRecord, init) {
8+
std::string message = "hello world";
9+
BinaryRecord rcd("./", std::move(message));
10+
rcd.tofile();
11+
12+
BinaryRecordReader reader("./", rcd.hash());
13+
LOG(INFO) << reader.data;
14+
ASSERT_EQ(reader.data, "hello world");
15+
}

visualdl/utils/filesystem.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include <unistd.h>
88
#include <fstream>
99

10+
#include "visualdl/utils/logging.h"
11+
1012
namespace visualdl {
1113

1214
namespace fs {
@@ -44,7 +46,6 @@ bool DeSerializeFromFile(T* proto, const std::string& path) {
4446
}
4547

4648
static void TryMkdir(const std::string& dir) {
47-
// VLOG(1) << "try to mkdir " << dir;
4849
struct stat st = {0};
4950
if (stat(dir.c_str(), &st) == -1) {
5051
::mkdir(dir.c_str(), 0700);
@@ -67,7 +68,6 @@ static void TryRecurMkdir(const std::string& path) {
6768
inline void Write(const std::string& path,
6869
const std::string& buffer,
6970
std::ios::openmode open_mode = std::ios::binary) {
70-
VLOG(1) << "write to path " << path;
7171
std::ofstream file(path, open_mode);
7272
CHECK(file.is_open()) << "failed to open " << path;
7373
file.write(buffer.c_str(), buffer.size());
@@ -76,7 +76,6 @@ inline void Write(const std::string& path,
7676

7777
inline std::string Read(const std::string& path,
7878
std::ios::openmode open_mode = std::ios::binary) {
79-
VLOG(1) << "read from path " << path;
8079
std::string buffer;
8180
std::ifstream file(path, open_mode | std::ios::ate);
8281
CHECK(file.is_open()) << "failed to open " << path;

0 commit comments

Comments
 (0)