Skip to content

Commit db7b117

Browse files
authored
change lod tensor to absolute offsets (#4952)
1 parent 1680903 commit db7b117

File tree

3 files changed

+73
-52
lines changed

3 files changed

+73
-52
lines changed

paddle/framework/lod_tensor.cc

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,50 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
2525
for (size_t i = level_begin; i < level_end; i++) {
2626
new_lod.emplace_back(in.at(i));
2727
}
28+
// transform the lowest level to absolute offset.
29+
LoD abs_offset_lod = ToAbsOffset(in);
30+
new_lod.back() = abs_offset_lod[level_end - 1];
2831
return new_lod;
2932
}
3033

3134
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
3235
size_t elem_end) {
33-
// slice the lod.
34-
LoD new_lod;
35-
new_lod.reserve(in.size() - level);
36-
auto start = in.at(level)[elem_begin];
37-
auto end = in.at(level)[elem_end];
38-
39-
for (auto it = in.begin() + level; it != in.end(); it++) {
40-
auto it_begin = std::find(it->begin(), it->end(), start);
41-
auto it_end = std::find(it_begin, it->end(), end);
42-
PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info");
43-
PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info");
44-
new_lod.emplace_back(it_begin, it_end + 1);
45-
// reset offset if tensor is copyed and sliced.
46-
std::transform(new_lod.back().begin(), new_lod.back().end(),
47-
new_lod.back().begin(),
48-
[start](int v) { return v - start; });
49-
PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LoD");
36+
PADDLE_ENFORCE_LT(level, in.size());
37+
PADDLE_ENFORCE_LT(elem_end, in[level].size());
38+
39+
LoD res;
40+
res.resize(in.size() - level);
41+
// copy the first level
42+
res[0].assign(in[level].begin() + elem_begin,
43+
in[level].begin() + elem_end + 1);
44+
for (size_t lvl = 1; lvl < res.size(); lvl++) {
45+
const auto& in_level = in[level + lvl];
46+
const auto& above_level = res[lvl - 1];
47+
auto& out_level = res[lvl];
48+
out_level.assign(in_level.begin() + above_level.front(),
49+
in_level.begin() + above_level.back() + 1);
5050
}
51-
PADDLE_ENFORCE_LE(new_lod.size(), in.size());
52-
return new_lod;
51+
for (size_t lvl = 0; lvl < res.size(); lvl++) {
52+
// to make the first offset equals 0, all the elements minus the first
53+
// element
54+
size_t front = res[lvl].front();
55+
for (auto& ele : res[lvl]) {
56+
ele -= front;
57+
}
58+
}
59+
return res;
60+
}
61+
62+
LoD ToAbsOffset(const LoD& in) {
63+
// the lowest level stores relative offsets
64+
if (in.empty() || in.size() == 1) return in;
65+
LoD result = in;
66+
for (int level = result.size() - 2; level >= 0; level--) {
67+
for (auto& ele : result[level]) {
68+
ele = result[level + 1][ele];
69+
}
70+
}
71+
return result;
5372
}
5473

5574
bool operator==(const LoD& a, const LoD& b) {
@@ -75,17 +94,7 @@ bool operator==(const LoD& a, const LoD& b) {
7594
size_t LoDTensor::NumElements(size_t level, size_t idx) const {
7695
PADDLE_ENFORCE_LT(level, NumLevels());
7796
PADDLE_ENFORCE_LT(idx, NumElements(level));
78-
// the last level of LoD, just return number of records in Tensor
79-
if (level == NumLevels() - 1) {
80-
return lod_[level][idx + 1] - lod_[level][idx];
81-
}
82-
// high level of LoD, and there is another lower level, return number of
83-
// lower-level elements
84-
auto tmp = SliceInLevel(lod_, level, idx, idx + 1);
85-
PADDLE_ENFORCE_GE(tmp.size(), 2);
86-
// there is a 0 as a placeholder stored in LoD, so the number of elements
87-
// equals lod.size() - 1
88-
return tmp[1].size() - 1;
97+
return lod_[level][idx + 1] - lod_[level][idx];
8998
}
9099

91100
void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) {

paddle/framework/lod_tensor.h

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,36 @@ using Vector = thrust::host_vector<
3939
#endif
4040

4141
/*
42-
* 3-level LoD stores
42+
* LoD is short for Level of Details.
4343
*
44-
* 0 10 20
45-
* 0 5 10 15 20
46-
* 0 2 5 7 10 12 15 20
47-
*
48-
* - in a level, each element indicates offset in the underlying Tensor
44+
* - in a level, each element indicates relative offset of the lower level
4945
* - the first element should be 0 and that indicates that this sequence start
5046
* from 0
5147
* - each sequence's begin and end(no-inclusive) is level[id, id+1]
48+
*
49+
* For example:
50+
* 3-level LoD stores
51+
*
52+
* 0 2 3
53+
* 0 2 4 7
54+
* 0 2 5 7 10 12 15 20
5255
*/
5356
using LoD = std::vector<Vector<size_t>>;
5457

58+
/*
59+
* Slice levels from a LoD.
60+
* NOTE the lowest level should always be the absolute offsets of the underlying
61+
* tensor instances. So if higher layers are sliced without the lowest level,
62+
* the lower level of the sliced LoD will be transformed to the absolute offset.
63+
*/
5564
LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end);
5665

5766
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
5867
size_t elem_end);
68+
/*
69+
* Transform an LoD from relative offsets to absolute offsets.
70+
*/
71+
LoD ToAbsOffset(const LoD& in);
5972

6073
bool operator==(const LoD& a, const LoD& b);
6174

paddle/framework/lod_tensor_test.cc

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ class LoDTensorTester : public ::testing::Test {
3030
// 0 5 10 15 20
3131
// 0 2 5 7 10 12 15 20
3232
LoD lod;
33-
lod.push_back(std::vector<size_t>{0, 10, 20});
34-
lod.push_back(std::vector<size_t>{0, 5, 10, 15, 20});
33+
lod.push_back(std::vector<size_t>{0, 2, 3});
34+
lod.push_back(std::vector<size_t>{0, 2, 5, 8});
3535
lod.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
3636

3737
ASSERT_EQ(lod.size(), 3UL);
@@ -52,14 +52,14 @@ TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor_.NumLevels(), 3UL); }
5252

5353
TEST_F(LoDTensorTester, NumElements) {
5454
ASSERT_EQ(lod_tensor_.NumElements(0), 2UL);
55-
ASSERT_EQ(lod_tensor_.NumElements(1), 4UL);
55+
ASSERT_EQ(lod_tensor_.NumElements(1), 3UL);
5656
ASSERT_EQ(lod_tensor_.NumElements(2), 8UL);
5757
}
5858

5959
TEST_F(LoDTensorTester, NumElements2) {
6060
ASSERT_EQ(lod_tensor_.NumElements(0, 0), 2UL);
61-
ASSERT_EQ(lod_tensor_.NumElements(0, 1), 2UL);
62-
ASSERT_EQ(lod_tensor_.NumElements(1, 1), 2UL);
61+
ASSERT_EQ(lod_tensor_.NumElements(0, 1), 1UL);
62+
ASSERT_EQ(lod_tensor_.NumElements(1, 1), 3UL);
6363
}
6464

6565
TEST_F(LoDTensorTester, ShrinkLevels) {
@@ -68,37 +68,36 @@ TEST_F(LoDTensorTester, ShrinkLevels) {
6868
LoDTensor new_lod_tensor = lod_tensor_;
6969
new_lod_tensor.ShrinkLevels(level, level + 1);
7070
ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
71-
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level));
7271
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
7372
}
7473
// shrink 2 level
7574
for (size_t level = 0; level < 2UL; ++level) {
7675
LoDTensor new_lod_tensor = lod_tensor_;
7776
new_lod_tensor.ShrinkLevels(level, level + 2);
77+
// the lowest level's last element should be the tensor's batch_size.
78+
ASSERT_EQ(new_lod_tensor.lod().back().back(),
79+
lod_tensor_.lod().back().back());
7880
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
79-
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level));
80-
ASSERT_EQ(new_lod_tensor.NumElements(1),
81-
lod_tensor_.NumElements(level + 1));
8281
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
8382
}
8483
}
8584

8685
TEST_F(LoDTensorTester, ShrinkInLevel) {
8786
size_t level = 0;
8887
LoDTensor new_lod_tensor = lod_tensor_;
89-
new_lod_tensor.ShrinkInLevel(level, 0, 2);
88+
new_lod_tensor.ShrinkInLevel(level, 0, 1);
9089
EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL);
91-
EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL);
92-
EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL);
93-
EXPECT_EQ(new_lod_tensor.NumElements(2), 8UL);
90+
EXPECT_EQ(new_lod_tensor.NumElements(0), 1UL);
91+
EXPECT_EQ(new_lod_tensor.NumElements(1), 2UL);
92+
EXPECT_EQ(new_lod_tensor.NumElements(2), 5UL);
9493
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
9594

9695
level = 1;
9796
new_lod_tensor = lod_tensor_;
98-
new_lod_tensor.ShrinkInLevel(level, 0, 2);
97+
new_lod_tensor.ShrinkInLevel(level, 1, 2);
9998
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
100-
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
101-
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
99+
ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL);
100+
ASSERT_EQ(new_lod_tensor.NumElements(1), 3UL);
102101
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
103102
}
104103

0 commit comments

Comments
 (0)