Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make stl.h list|set|map_caster more user friendly. #4686

Merged
merged 55 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
d2a36d9
Add `test_pass_std_vector_int()`, `test_pass_std_set_int()` in test_stl
May 29, 2023
7d30378
Change `list_caster` to also accept generator objects (`PyGen_Check(s…
May 30, 2023
039d1b7
Drop in (currently unpublished) PyCLIF code, use in `list_caster`, ad…
Jun 5, 2023
bdba857
Use `PyObjectTypeIsConvertibleToStdSet()` in `set_caster`, adjust tests.
Jun 5, 2023
ec50ca0
Use `PyObjectTypeIsConvertibleToStdMap()` in `map_caster`, add tests.
Jun 5, 2023
eeb59af
Simplify `list_caster` `load()` implementation, push str/bytes check …
Jun 5, 2023
3631886
clang-tidy cleanup with a few extra `(... != 0)` to be more consistent.
Jun 5, 2023
b4f767b
Also use `PyObjectTypeIsConvertibleToStdVector()` in `array_caster`.
Jun 7, 2023
42e22d7
Merge branch 'master' into list_caster_pass_generator
Jun 7, 2023
9647a92
Merge branch 'master' into list_caster_pass_generator
Jun 27, 2023
1ed90cf
Merge branch 'master' into list_caster_pass_generator
Jul 14, 2023
3cc034b
Update comment pointing to clif/python/runtime.cc (code is unchanged).
Jul 14, 2023
18c4153
Merge branch 'master' into list_caster_pass_generator
Jul 14, 2023
e7330f2
Comprehensive test coverage, enhanced set_caster load implementation.
Jul 14, 2023
da29bf5
Resolve clang-tidy eror.
Jul 15, 2023
1fa338e
Add a long C++ comment explaining what led to the `PyObjectTypeIsConv…
Jul 15, 2023
6b5c780
Minor function name change in test.
Jul 15, 2023
9150a88
Merge branch 'master' into list_caster_pass_generator
Jul 15, 2023
62fda81
Merge branch 'master' into list_caster_pass_generator
Jul 17, 2023
4b25f74
strcmp -> std::strcmp (thanks @Skylion007 for catching this)
Jul 17, 2023
3cfc95b
Add `PyCallable_Check(items)` in `PyObjectTypeIsConvertibleToStdMap()`
Jul 17, 2023
59509f2
Resolve clang-tidy error
Jul 17, 2023
903faac
Merge branch 'master' into list_caster_pass_generator
Jul 17, 2023
4ab40d7
Use `PyMapping_Items()` instead of `src.attr("items")()`, to be inter…
Jul 17, 2023
5f7c9d4
Update link to PyCLIF sources.
Jul 17, 2023
ccfeb02
Fix typo (thanks @wangxf123456 for catching this)
Jul 17, 2023
6c1ec6a
Merge branch 'master' into list_caster_pass_generator
Jul 23, 2023
cc4d69c
Merge branch 'master' into list_caster_pass_generator
Aug 4, 2023
dacb827
Add `test_pass_std_vector_int()`, `test_pass_std_set_int()` in test_stl
May 29, 2023
3fc2d2a
Change `list_caster` to also accept generator objects (`PyGen_Check(s…
May 30, 2023
ff0724f
Drop in (currently unpublished) PyCLIF code, use in `list_caster`, ad…
Jun 5, 2023
be02291
Use `PyObjectTypeIsConvertibleToStdSet()` in `set_caster`, adjust tests.
Jun 5, 2023
0a79f55
Use `PyObjectTypeIsConvertibleToStdMap()` in `map_caster`, add tests.
Jun 5, 2023
ae95424
Simplify `list_caster` `load()` implementation, push str/bytes check …
Jun 5, 2023
8aa81d7
clang-tidy cleanup with a few extra `(... != 0)` to be more consistent.
Jun 5, 2023
4435ed0
Also use `PyObjectTypeIsConvertibleToStdVector()` in `array_caster`.
Jun 7, 2023
9aca5e8
Update comment pointing to clif/python/runtime.cc (code is unchanged).
Jul 14, 2023
9dc7d7c
Comprehensive test coverage, enhanced set_caster load implementation.
Jul 14, 2023
d9eeea8
Resolve clang-tidy eror.
Jul 15, 2023
9ac319b
Add a long C++ comment explaining what led to the `PyObjectTypeIsConv…
Jul 15, 2023
b834767
Minor function name change in test.
Jul 15, 2023
d19a0ff
strcmp -> std::strcmp (thanks @Skylion007 for catching this)
Jul 17, 2023
69a0da8
Add `PyCallable_Check(items)` in `PyObjectTypeIsConvertibleToStdMap()`
Jul 17, 2023
c86fa97
Resolve clang-tidy error
Jul 17, 2023
940e190
Use `PyMapping_Items()` instead of `src.attr("items")()`, to be inter…
Jul 17, 2023
f7725e4
Update link to PyCLIF sources.
Jul 17, 2023
fa72918
Fix typo (thanks @wangxf123456 for catching this)
Jul 17, 2023
a6b9a00
Merge branch 'list_caster_pass_generator' of https://github.com/rwgk/…
Aug 31, 2023
ac3ac4d
Merge branch 'master' into list_caster_pass_generator
Sep 12, 2023
87ff92a
Merge branch 'master' into list_caster_pass_generator
Nov 16, 2023
3ae34f3
Fix typo discovered by new version of codespell.
Nov 16, 2023
d1ffe4f
Merge branch 'master' into list_caster_pass_generator
Jul 31, 2024
2267e5c
Merge branch 'master' into list_caster_pass_generator
Aug 10, 2024
a38d0bd
Merge branch 'master' into list_caster_pass_generator
Aug 13, 2024
fbf41fd
Merge branch 'master' into list_caster_pass_generator
Aug 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions include/pybind11/stl.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,38 @@ struct list_caster {
using value_conv = make_caster<Value>;

bool load(handle src, bool convert) {
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src)) {
if (isinstance<bytes>(src) || isinstance<str>(src)) {
return false;
}
auto s = reinterpret_borrow<sequence>(src);
if (isinstance<sequence>(src)) {
return convert_elements(src, convert);
}
if (!convert) {
return false;
}
if (PyGen_Check(src.ptr())) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we really be supporting a py::iterable object instead? I think a generator would count here and simplify the code.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I definitely like the general thinking.

Please give me a little more time. I'm in the middle of what I am thinking of as "data driven cleanup".

PyCLIF is very far on the permissive side of the spectrum. — That's what's giving me the data: what is being used, how much.

pybind11 is very far on the strict side.

A glimpse on where I am right now: PyCLIF supports passing any iterable for a C++ set. I think that's going too far to the permissive side of the spectrum. So I'm going through cleaning that up, even though it's a lot of leg work. But I believe it makes the code safer and more readable.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just pushed the equivalent of the behavior I settled on for PyCLIF. I'm still working on rolling out the PyCLIF behavior changes internally. Only after that can I submit internally and then export the changed PyCLIF to github.

High-level: I'm converging the behaviors

  • by changing PyCLIF to be stricter & safer,
  • while this PR makes pybind11 more user friendly without crossing into unsafe territory.

Shouldn't we really be supporting a py::iterable object instead?

That's what PyCLIF used to do, but it has pitfalls that led to lots of unclean test code: Python {} (empty dict) passed as input for std::vector and std::set, [] as input for std::set and std::map.

The originally very permissive PyCLIF behavior gave me a fairly unique set of data: I could see what's being used a lot, what makes sense (IMO), and what doesn't make sense or seems unsafe (again IMO). The new code block near the top of stl.h, transferred here from PyCLIF, encodes my conclusions. With that the internal cleanup turned out to be relatively small, only around 50 changes I had to mail out. This is part of the change description for those (slightly edited):


Motivations for the behavior change:

  • Improves safety / prevents accidents:

    • For C++ std::set (and std::unordered_set): The potentially reducing conversion from a Python sequence (e.g. Python list or tuple) to a C++ set must be explicit.

    • For C++ std::vector: In very rare cases (itertools.chain) the conversion from a Python iterable must be explicit.

  • Improves readability: Python literals to be passed as C++ sets must be set literals.


I'll finish the rollout of the PyCLIF change, then come back here for polishing maybe and further expanding the tests. Please let me know any high-level suggestions.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PyCLIF behavior change (with prior cleanup around the codebase) is now complete: google/clif@30c5488

Internally I'm testing (globally) PyCLIF-pybind11 with this PR. Unfortunately some unrelated problem slipped in that I need to troubleshoot first, before I can convince myself that this PR actually fixes all pybind11/stl.h vs PyCLIF behavior differences.

// Designed to be behavior-equivalent to passing tuple(src) from Python:
// The conversion to a tuple will first exhaust the generator object, to ensure that
// the generator is not left in an unpredictable (to the caller) partially-consumed
// state.
assert(isinstance<iterable>(src));
return convert_elements(tuple(reinterpret_borrow<iterable>(src)), convert);
}
return false;
}

private:
template <typename T = Type, enable_if_t<has_reserve_method<T>::value, int> = 0>
void reserve_maybe(const sequence &s, Type *) {
value.reserve(s.size());
}
void reserve_maybe(const sequence &, void *) {}

bool convert_elements(handle seq, bool convert) {
auto s = reinterpret_borrow<sequence>(seq);
value.clear();
reserve_maybe(s, &value);
for (auto it : s) {
for (auto it : seq) {
value_conv conv;
if (!conv.load(it, convert)) {
return false;
Expand All @@ -182,13 +207,6 @@ struct list_caster {
return true;
}

private:
template <typename T = Type, enable_if_t<has_reserve_method<T>::value, int> = 0>
void reserve_maybe(const sequence &s, Type *) {
value.reserve(s.size());
}
void reserve_maybe(const sequence &, void *) {}

public:
template <typename T>
static handle cast(T &&src, return_value_policy policy, handle parent) {
Expand Down
3 changes: 3 additions & 0 deletions tests/test_stl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,4 +548,7 @@ TEST_SUBMODULE(stl, m) {
[]() { return new std::vector<bool>(4513); },
// Without explicitly specifying `take_ownership`, this function leaks.
py::return_value_policy::take_ownership);

m.def("pass_std_vector_int", [](const std::vector<int> &vec_int) { return vec_int.size(); });
m.def("pass_std_set_int", [](const std::set<int> &set_int) { return set_int.size(); });
}
38 changes: 38 additions & 0 deletions tests/test_stl.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,3 +379,41 @@ def test_return_vector_bool_raw_ptr():
v = m.return_vector_bool_raw_ptr()
assert isinstance(v, list)
assert len(v) == 4513


def test_pass_std_vector_int():
fn = m.pass_std_vector_int
assert fn([1, 2]) == 2
assert fn((1, 2)) == 2
assert fn(i for i in range(3)) == 3
with pytest.raises(TypeError):
fn(set())
with pytest.raises(TypeError):
fn({})
with pytest.raises(TypeError):
fn({}.keys())


def test_pass_std_set_int():
fn = m.pass_std_set_int
assert fn({1, 2}) == 2
with pytest.raises(TypeError):
fn([1, 2])
with pytest.raises(TypeError):
fn((1, 2))
with pytest.raises(TypeError):
fn({})
with pytest.raises(TypeError):
fn({}.keys())
with pytest.raises(TypeError):
fn(i for i in range(3))


def test_list_caster_fully_consumes_generator_object():
def gen_mix():
yield from [1, 2.0, 3]

gen_obj = gen_mix()
with pytest.raises(TypeError):
m.pass_std_vector_int(gen_obj)
assert not tuple(gen_obj)