Skip to content

Commit 9fd4ddf

Browse files
committed
Python bindings SizeType update.
Added LInf metric to the Python bindings. Python code guide update. Documentation update.
1 parent 3bc678a commit 9fd4ddf

File tree

15 files changed

+310
-243
lines changed

15 files changed

+310
-243
lines changed

examples/benchmark/plot_benchmarks.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,48 @@
11
#!/usr/bin/env python
22

3-
import argparse
4-
from argparse import ArgumentParser
3+
from argparse import ArgumentParser, FileType
54
import json
65
import re
76
import matplotlib.pyplot as plt
87

98

109
def create_arguments():
1110
parser = ArgumentParser(description='benchmark visualization tool')
12-
parser.add_argument('-json_file', '-j', type=argparse.FileType('r'),
11+
parser.add_argument('-json_file', '-j', type=FileType('r'),
1312
required=True,
1413
help='JSON file with benchmark data')
1514

1615
return parser
1716

1817

19-
def get_benchmark_subset(benchmarks, pattern):
20-
return [x for x in benchmarks if pattern.match(x['name'])]
18+
def filter_benchmarks(benchmarks, filter_pattern):
19+
return [x for x in benchmarks if filter_pattern.match(x['name'])]
2120

2221

23-
def get_benchmarks(json):
22+
def filter_benchmark_categories(json):
2423
benchmarks = [x for x in json['benchmarks'] if x['name'].endswith('_mean')]
2524

2625
if not benchmarks:
2726
benchmarks = json['benchmarks']
2827

2928
return [
30-
get_benchmark_subset(benchmarks, re.compile(r'.+/Build.+')),
31-
get_benchmark_subset(benchmarks, re.compile(r'.+/(Knn|Nn).+')),
32-
get_benchmark_subset(benchmarks, re.compile(r'.+/Radius.+'))
29+
filter_benchmarks(benchmarks, re.compile(r'.+/Build.+')),
30+
filter_benchmarks(benchmarks, re.compile(r'.+/(Knn|Nn).+')),
31+
filter_benchmarks(benchmarks, re.compile(r'.+/Radius.+'))
3332
]
3433

3534

36-
def get_plots(benchmarks_subset, pattern):
35+
def create_plots(benchmarks, pattern):
3736
plots = dict()
3837

39-
for x in benchmarks_subset:
38+
for x in benchmarks:
4039
m = pattern.match(x['name'])
4140
k = m.group('tree') + '_' + \
4241
m.group('type') + (('_' + m.group('arg'))
4342
if m.group('arg') else '')
4443
plots[k] = {'x': [], 'y': []}
4544

46-
for x in benchmarks_subset:
45+
for x in benchmarks:
4746
m = pattern.match(x['name'])
4847
k = m.group('tree') + '_' + \
4948
m.group('type') + (('_' + m.group('arg'))
@@ -54,7 +53,7 @@ def get_plots(benchmarks_subset, pattern):
5453
return plots
5554

5655

57-
def get_figure(plots, title):
56+
def create_figure(plots, title):
5857
fig, ax = plt.subplots(figsize=(4, 4), tight_layout=True)
5958

6059
for label in plots:
@@ -74,17 +73,19 @@ def main():
7473
parser = create_arguments()
7574
args = parser.parse_args()
7675

77-
benchmarks = get_benchmarks(json.load(args.json_file))
76+
benchmarks = filter_benchmark_categories(json.load(args.json_file))
7877
re_info = r'^Bm(?P<tree>.+)/(Build|Knn|Nn|Radius)(?P<type>(Ct|Rt)[^/]*)/(?P<x>\d+)(/(?P<arg>\d+))?(_mean)?$'
79-
plots = [get_plots(b, re.compile(re_info)) for b in benchmarks]
80-
78+
plots = [create_plots(b, re.compile(re_info)) for b in benchmarks]
79+
titles = ['build time', 'knn search time', 'radius search time']
8180
# Format is determined by filename extension
8281
extension = '.png'
83-
get_figure(plots[0], 'build time')[0].savefig(f'./build_time{extension}')
84-
get_figure(plots[1], 'knn search time')[
85-
0].savefig(f'./knn_search_time{extension}')
86-
get_figure(plots[2], 'radius search time')[
87-
0].savefig(f'./radius_search_time{extension}')
82+
file_names = [
83+
f'./build_time{extension}',
84+
f'./knn_search_time{extension}',
85+
f'./radius_search_time{extension}']
86+
87+
for i in range(len(plots)):
88+
create_figure(plots[i], titles[i])[0].savefig(file_names[i])
8889
plt.show()
8990

9091

examples/kd_tree/kd_tree_dynamic_arrays.cpp

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,43 +8,46 @@
88
// for working with an array of scalars or an array of points.
99

1010
void ArrayOfScalars() {
11-
std::size_t count = 10;
11+
std::size_t count = 6;
1212
constexpr std::size_t Dim = 2;
1313

14-
// Dummy array of scalars.
15-
std::unique_ptr<double[]> data = std::make_unique<double[]>(count);
16-
for (std::size_t i = 0; i < count; ++i) {
14+
// Here we create an array of scalars that will be interpreted as a set of
15+
// points: {{0, 1}, {2, 3}, ...}
16+
std::unique_ptr<double[]> data = std::make_unique<double[]>(count * Dim);
17+
for (std::size_t i = 0; i < (count * Dim); ++i) {
1718
data[i] = static_cast<double>(i);
1819
}
1920

20-
// If Dim equals pico_tree::kDynamicSize, SpaceMap needs a 3rd argument: The
21-
// spatial dimension known at run time.
22-
pico_tree::SpaceMap<pico_tree::PointMap<double, Dim>> map(
23-
data.get(), count / Dim);
21+
// If Dim equals pico_tree::kDynamicSize, then SpaceMap will need a 3rd
22+
// argument: The spatial dimension known at run time.
23+
pico_tree::SpaceMap<pico_tree::PointMap<double, Dim>> map(data.get(), count);
2424

2525
std::size_t max_leaf_size = 3;
2626
pico_tree::KdTree<pico_tree::SpaceMap<pico_tree::PointMap<double, Dim>>> tree(
2727
map, max_leaf_size);
2828

29-
// If Dim equals pico_tree::kDynamicSize, PointMap needs a 2nd argument: The
30-
// spatial dimension known at run time.
31-
pico_tree::PointMap<double, Dim> query(data.get() + 4);
29+
// If Dim equals pico_tree::kDynamicSize, then PointMap will need a 2nd
30+
// argument: The spatial dimension known at run time.
31+
std::size_t index = 2;
32+
pico_tree::PointMap<double, Dim> query(data.get() + index * Dim);
3233
pico_tree::Neighbor<int, double> nn;
3334
tree.SearchNn(query, nn);
35+
3436
// Prints index 2.
3537
std::cout << "Index closest point: " << nn.index << std::endl;
3638
}
3739

3840
void ArrayOfPoints() {
39-
std::size_t count = 3;
41+
std::size_t count = 6;
4042
constexpr std::size_t Dim = 2;
4143

42-
// Dummy array of points.
44+
// Here we create an array of points: {{0, 1}, {2, 3}, ...}
4345
std::unique_ptr<std::array<double, Dim>[]> data =
4446
std::make_unique<std::array<double, Dim>[]>(count);
4547
for (std::size_t i = 0; i < count; ++i) {
46-
data[i] = {
47-
static_cast<double>(i * Dim + 0), static_cast<double>(i * Dim + 1)};
48+
for (std::size_t j = 0; j < Dim; ++j) {
49+
data[i][j] = static_cast<double>(i * Dim + j);
50+
}
4851
}
4952

5053
pico_tree::SpaceMap<std::array<double, Dim>> map(data.get(), count);
@@ -53,7 +56,8 @@ void ArrayOfPoints() {
5356
pico_tree::KdTree<pico_tree::SpaceMap<std::array<double, Dim>>> tree(
5457
map, max_leaf_size);
5558

56-
std::array<double, Dim> const& query = data[1];
59+
std::size_t index = 1;
60+
std::array<double, Dim> const& query = data[index];
5761
pico_tree::Neighbor<int, double> nn;
5862
tree.SearchNn(query, nn);
5963

examples/opencv/opencv.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,27 @@ Index const kNumPoints = 1024 * 1024 * 2;
1111
Scalar const kArea = 1000.0;
1212
std::size_t const kRunCount = 1024 * 1024;
1313

14-
template <typename Scalar_>
15-
std::vector<cv::Point3_<Scalar_>> GenerateRandomPoint3N(int n, Scalar_ size) {
14+
template <typename Vec_>
15+
std::vector<Vec_> GenerateRandomVecN(
16+
std::size_t n, typename Vec_::value_type size) {
1617
std::random_device rd;
1718
std::mt19937 e2(rd());
18-
std::uniform_real_distribution<Scalar_> dist(0, size);
19+
std::uniform_real_distribution<typename Vec_::value_type> dist(0, size);
1920

20-
std::vector<cv::Point3_<Scalar_>> random(n);
21+
std::vector<Vec_> random(n);
2122
for (auto& p : random) {
22-
p.x = dist(e2);
23-
p.y = dist(e2);
24-
p.z = dist(e2);
23+
for (auto& c : p.val) {
24+
c = dist(e2);
25+
}
2526
}
2627

2728
return random;
2829
}
2930

3031
// This example shows to build a KdTree from a vector of cv::Point3.
3132
void BasicVector() {
32-
using PointX = cv::Point3_<Scalar>;
33-
std::vector<PointX> random = GenerateRandomPoint3N(kNumPoints, kArea);
33+
using PointX = cv::Vec<Scalar, 3>;
34+
std::vector<PointX> random = GenerateRandomVecN<PointX>(kNumPoints, kArea);
3435

3536
pico_tree::KdTree<std::reference_wrapper<std::vector<PointX>>> tree(
3637
random, 10);
@@ -63,8 +64,8 @@ void BasicMatrix() {
6364

6465
// Single column cv::Mat based on a vector of points.
6566
{
66-
using PointX = cv::Point3_<Scalar>;
67-
std::vector<PointX> random = GenerateRandomPoint3N(kNumPoints, kArea);
67+
using PointX = cv::Vec<Scalar, 3>;
68+
std::vector<PointX> random = GenerateRandomVecN<PointX>(kNumPoints, kArea);
6869

6970
pico_tree::KdTree<pico_tree::MatWrapper<Scalar, 3>> tree(
7071
cv::Mat(random), 10);

examples/python/kd_tree.py

Lines changed: 50 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111

1212
def tree_creation_and_query_types():
1313
print("*** KdTree Creation And Basic Information ***")
14-
# An input array must have a dimension of two and it must be contiguous. A
15-
# C contiguous array contains points in its rows and an F contiguous array
16-
# contains points in its columns.
14+
# An input array must have a dimension of two and it must be
15+
# contiguous. A C contiguous array contains points in its rows and
16+
# an F contiguous array contains points in its columns.
1717
p = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float32)
18-
# Both the in and output distances are squared when using Metric.L2Squared.
18+
# Both the in and output distances are squared when using
19+
# Metric.L2Squared.
1920
t = pt.KdTree(p, pt.Metric.L2Squared, 1)
2021
print(f"{t}")
2122
print(f"Number of points used to build the tree: {t.npts}")
@@ -37,9 +38,10 @@ def tree_creation_and_query_types():
3738
print()
3839

3940
print("*** Approximate Nearest Neighbor Search ***")
40-
# Searching for approximate nearest neighbors works the same way.
41-
# An approximate nearest neighbor can be at most a distance factor of 1+e
42-
# farther away from the true nearest neighbor.
41+
# Approximate nearest neighbor searches require an extra parameter
42+
# compared to exact nearest neighbor searches, namely, a distance
43+
# factor. An approximate nearest neighbor can be at most a factor
44+
# of 1+e farther away from the true nearest neighbor.
4345
max_error = 0.75
4446
# Apply the metric function to the ratio to get the squared ratio.
4547
max_error_ratio = t.metric(1.0 + max_error)
@@ -48,14 +50,14 @@ def tree_creation_and_query_types():
4850
# Note that we scale back the ann distance its original distance.
4951
print("The 2nd closest to each input point:")
5052
for knn in knns:
51-
print(
52-
f"Point index {knn[1][0]} with distance {knn[1][1] * max_error_ratio}")
53+
print("Point index {0} with distance {1}".format(
54+
knn[1][0], knn[1][1] * max_error_ratio))
5355
print()
5456

5557
print("*** Radius Search ***")
56-
# A radius search doesn't return a numpy array but a custom vector of numpy
57-
# arrays. This is because the number of neighbors to each of input points
58-
# may vary for a radius search.
58+
# A radius search doesn't return a numpy array but a custom vector
59+
# of numpy arrays. This is because the number of neighbors to each
60+
# of input points may vary for a radius search.
5961
search_radius = t.metric(2.5)
6062
print(f"Result with radius: {search_radius}")
6163
rnns = t.search_radius(p, search_radius)
@@ -69,8 +71,9 @@ def tree_creation_and_query_types():
6971
print()
7072

7173
print("*** Box Search ***")
72-
# A box search returns the same data structure as a radius search. However,
73-
# instead of containing neighbors it simply contains indices.
74+
# A box search returns the same data structure as a radius search.
75+
# However, instead of containing neighbors it simply contains
76+
# indices.
7477
min = np.array([[0, 0], [2, 2], [0, 0], [6, 6]], dtype=np.float32)
7578
max = np.array([[3, 3], [3, 3], [9, 9], [9, 9]], dtype=np.float32)
7679
bnns = t.search_box(min, max)
@@ -96,11 +99,14 @@ def tree_creation_and_query_types():
9699
def array_initialization():
97100
print("*** Array Initialization ***")
98101
p = np.array([[2, 1], [4, 3], [8, 7]], dtype=np.float64)
99-
# In and output distances are absolute distances when using Metric.L1.
102+
# Metric.L1: The sum of absolute differences.
100103
t = pt.KdTree(p, pt.Metric.L1, 10)
104+
# Metric.LInf: The max of absolute differences.
105+
t = pt.KdTree(p, pt.Metric.LInf, 10)
101106

102-
# This type of forward initialization of arrays may be useful to streamline
103-
# loops that depend on them and where reusing memory is desired. E.g.: ICP.
107+
# This type of forward initialization of arrays may be useful to
108+
# streamline loops that depend on them and where reusing memory is
109+
# desired. E.g.: ICP.
104110
knns = np.empty((0), dtype=t.dtype_neighbor)
105111
print(knns.dtype)
106112
rnns = pt.DArray(dtype=t.dtype_neighbor)
@@ -112,9 +118,9 @@ def array_initialization():
112118

113119
def performance_test_pico_tree():
114120
print("*** Performance against scans.bin ***")
115-
# The benchmark documentation, docs/benchmark.md section "Running a new
116-
# benchmark", explains how to generate a scans.bin file from an online
117-
# dataset.
121+
# The benchmark documentation, docs/benchmark.md section "Running a
122+
# new benchmark", explains how to generate a scans.bin file from an
123+
# online dataset.
118124
try:
119125
p0 = np.fromfile(Path(__file__).parent / "scans0.bin",
120126
np.float32).reshape((-1, 3))
@@ -125,34 +131,36 @@ def performance_test_pico_tree():
125131
return
126132

127133
cnt_build_time_before = perf_counter()
128-
# Tree creation is only slightly slower in Python vs C++ using the bindings.
134+
# Tree creation is only slightly slower in Python.
129135
t = pt.KdTree(p0, pt.Metric.L2Squared, 10)
130-
#t = spKDTree(p0, leafsize=10)
131-
#t = skKDTree(p0, leaf_size=10)
132-
#t = pyKDTree(p0, leafsize=10)
136+
# t = spKDTree(p0, leafsize=10)
137+
# t = skKDTree(p0, leaf_size=10)
138+
# t = pyKDTree(p0, leafsize=10)
133139
cnt_build_time_after = perf_counter()
134-
print(f"{t} was built in {(cnt_build_time_after - cnt_build_time_before) * 1000.0}ms")
135-
# Use the OMP_NUM_THREADS environment variable to influence the number of
136-
# threads used for querying: export OMP_NUM_THREADS=1
140+
print("{0} was built in {1}ms".format(
141+
t, (cnt_build_time_after - cnt_build_time_before) * 1000.0))
142+
# Use the OMP_NUM_THREADS environment variable to influence the
143+
# number of threads used for querying: export OMP_NUM_THREADS=1
137144
k = 1
138145
cnt_query_time_before = perf_counter()
139-
# Searching for nearest neighbors is a constant amount of time slower
140-
# using the bindings as compared to the C++ benchmark (regardless of k).
141-
# The following must be noted however: The Python benchmark simply calls
142-
# the knn function provided by the Python bindings. As such it does not
143-
# directly wrap the C++ benchmark. This means the performance difference is
144-
# not only due to the bindings overhead. The C++ implementation benchmark
145-
# may have been optimized more because is very simple. The bindings also
146-
# have various extra overhead: checks, numpy array memory creation, OpenMP,
147-
# etc.
148-
# TODO The actual overhead is probably very similar to that of the KdTree
149-
# creation, but it would be nice to measure the overhead w.r.t. the actual
150-
# query.
146+
# Searching for nearest neighbors is a constant amount of time
147+
# slower using the bindings as compared to the C++ benchmark
148+
# (regardless of k). The following must be noted however: The
149+
# Python benchmark simply calls the knn function provided by the
150+
# Python bindings. As such it does not directly wrap the C++
151+
# benchmark. This means the performance difference is not only due
152+
# to the bindings overhead. The C++ implementation benchmark may
153+
# have been optimized more because is very simple. The bindings
154+
# also have various extra overhead: checks, numpy array memory
155+
# creation, OpenMP, etc.
156+
# TODO The actual overhead is probably very similar to that of the
157+
# KdTree creation, but it would be nice to measure the overhead
158+
# w.r.t. the actual query.
151159
unused_knns = t.search_knn(p1, k)
152-
#unused_dd, unused_ii = t.query(p1, k=k)
160+
# unused_dd, unused_ii = t.query(p1, k=k)
153161
cnt_query_time_after = perf_counter()
154-
print(
155-
f"{len(p1)} points queried in {(cnt_query_time_after - cnt_query_time_before) * 1000.0}ms")
162+
print("{0} points queried in {1}ms".format(
163+
len(p1), (cnt_query_time_after - cnt_query_time_before) * 1000.0))
156164
print()
157165

158166

src/pyco_tree/pico_tree/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
__all__ = []
1+
__all__ = ['DArray', 'KdTree', 'Metric']
22

3-
from .metric import *
4-
from .kd_tree import *
3+
from .kd_tree import KdTree
4+
from .metric import Metric
55
from ._pyco_tree import __doc__, DArray

src/pyco_tree/pico_tree/_pyco_tree/_pyco_tree.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@
99

1010
PYBIND11_MODULE(_pyco_tree, m) {
1111
m.doc() =
12-
"PicoTree is a module for nearest neighbor searches and range searches "
13-
"using a KdTree. It wraps the C++ PicoTree library.";
12+
R"ptdoc(
13+
PicoTree: a module for fast nearest neighbor and range searches using a
14+
KdTree. It wraps the C++ PicoTree library.
15+
)ptdoc";
1416

1517
// Registered dtypes.
1618
PYBIND11_NUMPY_DTYPE(pyco_tree::Neighborf, index, distance);
1719
PYBIND11_NUMPY_DTYPE(pyco_tree::Neighbord, index, distance);
1820

19-
pyco_tree::DefDArray(&m);
20-
pyco_tree::DefKdTree(&m);
21+
pyco_tree::DefDArray(m);
22+
pyco_tree::DefKdTree(m);
2123
}

0 commit comments

Comments
 (0)