-
Notifications
You must be signed in to change notification settings - Fork 609
feat(tdigest): add the support of TDIGEST.REVRANK command #3130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
03b69e1
97df4a1
70a39d3
dde8410
0d3e9cc
bb172a8
a64add4
3954b1f
05d1202
f688e14
8bcad0f
46ac984
495e072
2b6785d
3af3b54
f3d85d3
e68689d
eb8674f
c70f410
b991d0d
4c9a41d
543fda0
e0d39a7
a4ed14c
9d6c532
ff658f8
53e82f8
6df3309
201afed
0851c22
c7ed36f
3a898fe
3711578
4b4f684
bd268b4
8e6a7f9
6662240
e7f06a2
367981c
4b8cd6a
07836fd
f44bc56
2aded75
f4a9c53
5023de8
e3629d9
ae05623
0cf8c8a
f855895
c6d5fc7
fbeb111
f7d4b43
62dfbb5
3471dac
8e700fc
e943ab8
95516b1
85e4c44
ac32080
79d2e33
625510d
cc01373
07b1a09
b14dee6
d385597
222dec1
97426ac
baeafed
9e97b6e
8a8592a
cc4f736
17d9d48
83fae63
54ad714
e3d85b4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,8 @@ | |
|
|
||
| #include <fmt/format.h> | ||
|
|
||
| #include <map> | ||
| #include <numeric> | ||
| #include <vector> | ||
|
|
||
| #include "common/status.h" | ||
|
|
@@ -150,3 +152,92 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q) { | |
| diff /= (lc.weight / 2 + rc.weight / 2); | ||
| return Lerp(lc.mean, rc.mean, diff); | ||
| } | ||
|
|
||
| inline int DoubleCompare(double a, double b, double rel_eps = 1e-12, double abs_eps = 1e-9) { | ||
| double diff = a - b; | ||
| double adiff = std::abs(diff); | ||
| if (adiff <= abs_eps) return 0; | ||
| double maxab = std::max(std::abs(a), std::abs(b)); | ||
| if (adiff <= maxab * rel_eps) return 0; | ||
| return (diff < 0) ? -1 : 1; | ||
| } | ||
|
|
||
| inline bool DoubleEqual(double a, double b, double rel_eps = 1e-12, double abs_eps = 1e-9) { | ||
| return DoubleCompare(a, b, rel_eps, abs_eps) == 0; | ||
| } | ||
|
|
||
| struct DoubleComparator { | ||
| bool operator()(const double& a, const double& b) const { return DoubleCompare(a, b) == -1; } | ||
| }; | ||
|
|
||
| template <typename TD> | ||
| inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs, std::vector<int>& result) { | ||
| std::map<double, size_t, DoubleComparator> value_to_indices; | ||
| for (size_t i = 0; i < inputs.size(); ++i) { | ||
| value_to_indices[inputs[i]] = i; | ||
| } | ||
|
Comment on lines
+175
to
+178
|
||
|
|
||
| result.clear(); | ||
| result.resize(inputs.size(), -2); | ||
| auto it = value_to_indices.rbegin(); | ||
|
|
||
| // handle inputs larger than maximum | ||
| while (it != value_to_indices.rend() && it->first > td.Max()) { | ||
| result[it->second] = -1; | ||
| ++it; | ||
| } | ||
|
|
||
| auto iter = td.End(); | ||
| double cumulative_weight = 0; | ||
| while (iter->Valid() && it != value_to_indices.rend()) { | ||
| auto centroid = GET_OR_RET(iter->GetCentroid()); | ||
| auto input_value = it->first; | ||
| if (DoubleEqual(centroid.mean, input_value)) { | ||
| auto current_mean = centroid.mean; | ||
| auto current_mean_cumulative_weight = cumulative_weight + centroid.weight / 2; | ||
| cumulative_weight += centroid.weight; | ||
|
|
||
| // handle all the previous centroids which has the same mean | ||
| while (!iter->IsBegin() && iter->Prev()) { | ||
| auto next_centroid = GET_OR_RET(iter->GetCentroid()); | ||
| if (!DoubleEqual(current_mean, next_centroid.mean)) { | ||
| // move back to the last equal centroid, because we will process it in the next loop | ||
| iter->Next(); | ||
| break; | ||
| } | ||
| current_mean_cumulative_weight += next_centroid.weight / 2; | ||
| cumulative_weight += next_centroid.weight; | ||
| } | ||
|
|
||
| // handle the prev inputs which have the same value | ||
| result[it->second] = static_cast<int>(current_mean_cumulative_weight); | ||
| ++it; | ||
| if (iter->IsBegin()) { | ||
| break; | ||
| } | ||
| iter->Prev(); | ||
| } else if (DoubleCompare(centroid.mean, input_value) > 0) { | ||
| cumulative_weight += centroid.weight; | ||
| if (iter->IsBegin()) { | ||
| break; | ||
| } | ||
| iter->Prev(); | ||
| } else { | ||
| result[it->second] = static_cast<int>(cumulative_weight); | ||
| ++it; | ||
| } | ||
| } | ||
|
|
||
| // handle inputs less than minimum | ||
| while (it != value_to_indices.rend()) { | ||
| result[it->second] = static_cast<int>(td.TotalWeight()); | ||
| ++it; | ||
| } | ||
|
|
||
| for (auto r : result) { | ||
| if (r <= -2) { | ||
| return Status{Status::InvalidArgument, "invalid result when computing revrank"}; | ||
| } | ||
| } | ||
| return Status::OK(); | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.