Skip to content

Commit b4939fc

Browse files
jagermanrwgk
andauthored
Expand std::string_view support to str, bytes, memoryview (#3521)
* Expand string_view support to str, bytes, memoryview 1. Allows constructing a str or bytes implicitly from a string_view; this is essentially a small shortcut allowing a caller to write `py::bytes{sv}` rather than `py::bytes{sv.data(), sv.size()}`. 2. Allows implicit conversion *to* string_view from py::bytes -- this saves a fair bit more as currently there is no simple way to get such a view of the bytes without copying it (or resorting to Python API calls). (This is not done for `str` because when the str contains unicode we have to allocate to a temporary and so there might not be some string data we can properly view without owning.) 3. Allows `memoryview::from_memory` to accept a string_view. As with the other from_memory calls, it's entirely your responsibility to keep it alive. This also required moving the string_view availability detection into detail/common.h because this PR needs it in pytypes.h, which is higher up the include chain than cast.h where it was being detected currently. * Move string_view include to pytypes.h * CI-testing a fix for the "ambiguous conversion" issue. This change is known to fix the `tensorflow::tstring` issue reported under #3521 (comment) TODO: Minimal reproducer for the `tensorflow::tstring` issue. * Make clang-tidy happy (hopefully). * Adding minimal reproducer for the `tensorflow::tstring` issue. Error without the enable_if trick: ``` /usr/local/google/home/rwgk/forked/pybind11/tests/test_builtin_casters.cpp:169:16: error: ambiguous conversion for functional-style cast from 'TypeWithBothOperatorStringAndStringView' to 'py::bytes' return py::bytes(TypeWithBothOperatorStringAndStringView()); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1174:5: note: candidate constructor bytes(const std::string &s) : bytes(s.data(), s.size()) { } ^ /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1191:5: note: candidate constructor bytes(std::string_view s) : bytes(s.data(), s.size()) { } ^ ``` * Adding missing NOLINTNEXTLINE * Also apply ambiguous conversion workaround to str() Co-authored-by: Ralf W. Grosse-Kunstleve <rwgk@google.com>
1 parent cd176ce commit b4939fc

File tree

5 files changed

+95
-17
lines changed

5 files changed

+95
-17
lines changed

include/pybind11/cast.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,6 @@
2727
#include <utility>
2828
#include <vector>
2929

30-
#if defined(PYBIND11_CPP17)
31-
# if defined(__has_include)
32-
# if __has_include(<string_view>)
33-
# define PYBIND11_HAS_STRING_VIEW
34-
# endif
35-
# elif defined(_MSC_VER)
36-
# define PYBIND11_HAS_STRING_VIEW
37-
# endif
38-
#endif
39-
#ifdef PYBIND11_HAS_STRING_VIEW
40-
#include <string_view>
41-
#endif
42-
43-
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
44-
# define PYBIND11_HAS_U8STRING
45-
#endif
46-
4730
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
4831
PYBIND11_NAMESPACE_BEGIN(detail)
4932

include/pybind11/detail/common.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,21 @@
183183
# define PYBIND11_HAS_VARIANT 1
184184
#endif
185185

186+
#if defined(PYBIND11_CPP17)
187+
# if defined(__has_include)
188+
# if __has_include(<string_view>)
189+
# define PYBIND11_HAS_STRING_VIEW
190+
# endif
191+
# elif defined(_MSC_VER)
192+
# define PYBIND11_HAS_STRING_VIEW
193+
# endif
194+
#endif
195+
196+
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
197+
# define PYBIND11_HAS_U8STRING
198+
#endif
199+
200+
186201
#include <Python.h>
187202
#include <frameobject.h>
188203
#include <pythread.h>

include/pybind11/pytypes.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
# include <optional>
1919
#endif
2020

21+
#ifdef PYBIND11_HAS_STRING_VIEW
22+
# include <string_view>
23+
#endif
24+
2125
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
2226

2327
/* A few forward declarations */
@@ -1085,6 +1089,20 @@ class str : public object {
10851089
// NOLINTNEXTLINE(google-explicit-constructor)
10861090
str(const std::string &s) : str(s.data(), s.size()) { }
10871091

1092+
#ifdef PYBIND11_HAS_STRING_VIEW
1093+
// enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
1094+
template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
1095+
// NOLINTNEXTLINE(google-explicit-constructor)
1096+
str(T s) : str(s.data(), s.size()) { }
1097+
1098+
# ifdef PYBIND11_HAS_U8STRING
1099+
// reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char)
1100+
// NOLINTNEXTLINE(google-explicit-constructor)
1101+
str(std::u8string_view s) : str(reinterpret_cast<const char*>(s.data()), s.size()) { }
1102+
# endif
1103+
1104+
#endif
1105+
10881106
explicit str(const bytes &b);
10891107

10901108
/** \rst
@@ -1167,6 +1185,26 @@ class bytes : public object {
11671185
pybind11_fail("Unable to extract bytes contents!");
11681186
return std::string(buffer, (size_t) length);
11691187
}
1188+
1189+
#ifdef PYBIND11_HAS_STRING_VIEW
1190+
// enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
1191+
template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
1192+
// NOLINTNEXTLINE(google-explicit-constructor)
1193+
bytes(T s) : bytes(s.data(), s.size()) { }
1194+
1195+
// Obtain a string view that views the current `bytes` buffer value. Note that this is only
1196+
// valid so long as the `bytes` instance remains alive and so generally should not outlive the
1197+
// lifetime of the `bytes` instance.
1198+
// NOLINTNEXTLINE(google-explicit-constructor)
1199+
operator std::string_view() const {
1200+
char *buffer = nullptr;
1201+
ssize_t length = 0;
1202+
if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length))
1203+
pybind11_fail("Unable to extract bytes contents!");
1204+
return {buffer, static_cast<size_t>(length)};
1205+
}
1206+
#endif
1207+
11701208
};
11711209
// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
11721210
// are included in the doxygen group; close here and reopen after as a workaround
@@ -1714,6 +1752,13 @@ class memoryview : public object {
17141752
static memoryview from_memory(const void *mem, ssize_t size) {
17151753
return memoryview::from_memory(const_cast<void*>(mem), size, true);
17161754
}
1755+
1756+
#ifdef PYBIND11_HAS_STRING_VIEW
1757+
static memoryview from_memory(std::string_view mem) {
1758+
return from_memory(const_cast<char*>(mem.data()), static_cast<ssize_t>(mem.size()), true);
1759+
}
1760+
#endif
1761+
17171762
#endif
17181763
};
17191764

tests/test_builtin_casters.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,35 @@ TEST_SUBMODULE(builtin_casters, m) {
140140
m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
141141
m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
142142

143+
// The inner lambdas here are to also test implicit conversion
144+
using namespace std::literals;
145+
m.def("string_view_bytes", []() { return [](py::bytes b) { return b; }("abc \x80\x80 def"sv); });
146+
m.def("string_view_str", []() { return [](py::str s) { return s; }("abc \342\200\275 def"sv); });
147+
m.def("string_view_from_bytes", [](const py::bytes &b) { return [](std::string_view s) { return s; }(b); });
148+
#if PY_MAJOR_VERSION >= 3
149+
m.def("string_view_memoryview", []() {
150+
static constexpr auto val = "Have some \360\237\216\202"sv;
151+
return py::memoryview::from_memory(val);
152+
});
153+
#endif
154+
143155
# ifdef PYBIND11_HAS_U8STRING
144156
m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
145157
m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
146158
m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
159+
m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; });
147160
# endif
161+
162+
struct TypeWithBothOperatorStringAndStringView {
163+
// NOLINTNEXTLINE(google-explicit-constructor)
164+
operator std::string() const { return "success"; }
165+
// NOLINTNEXTLINE(google-explicit-constructor)
166+
operator std::string_view() const { return "failure"; }
167+
};
168+
m.def("bytes_from_type_with_both_operator_string_and_string_view",
169+
[]() { return py::bytes(TypeWithBothOperatorStringAndStringView()); });
170+
m.def("str_from_type_with_both_operator_string_and_string_view",
171+
[]() { return py::str(TypeWithBothOperatorStringAndStringView()); });
148172
#endif
149173

150174
// test_integer_casting

tests/test_builtin_casters.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,17 @@ def test_string_view(capture):
206206
"""
207207
)
208208

209+
assert m.string_view_bytes() == b"abc \x80\x80 def"
210+
assert m.string_view_str() == u"abc ‽ def"
211+
assert m.string_view_from_bytes(u"abc ‽ def".encode("utf-8")) == u"abc ‽ def"
212+
if hasattr(m, "has_u8string"):
213+
assert m.string_view8_str() == u"abc ‽ def"
214+
if not env.PY2:
215+
assert m.string_view_memoryview() == "Have some 🎂".encode()
216+
217+
assert m.bytes_from_type_with_both_operator_string_and_string_view() == b"success"
218+
assert m.str_from_type_with_both_operator_string_and_string_view() == "success"
219+
209220

210221
def test_integer_casting():
211222
"""Issue #929 - out-of-range integer values shouldn't be accepted"""

0 commit comments

Comments
 (0)