Skip to content

Commit acca2d2

Browse files
authored
Take control over resizing to enforce required invariants (#383)
* Add test related to attribute retention * Take control over resizing to enforce required invariants * Tweak comment
1 parent 6ddb13a commit acca2d2

File tree

8 files changed

+195
-3
lines changed

8 files changed

+195
-3
lines changed

cpp11test/src/test-r_vector.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,4 +325,47 @@ context("r_vector-C++") {
325325
expect_true(x.data() != R_NilValue);
326326
expect_true(x.size() == 3);
327327
}
328+
329+
test_that(
330+
"writable vector truncation resizes names and retains attributes (but not dim or "
331+
"dim names)") {
332+
cpp11::writable::integers x(2);
333+
x[0] = 1;
334+
x[1] = 2;
335+
336+
// Doubles the capacity from 2 to 4, meaning the underlying SEXP has length 4 now.
337+
x.push_back(3);
338+
expect_true(Rf_xlength(x.data()) == 4);
339+
340+
// Set some names
341+
SEXP names = PROTECT(Rf_allocVector(STRSXP, 3));
342+
SET_STRING_ELT(names, 0, Rf_mkCharCE("x", CE_UTF8));
343+
SET_STRING_ELT(names, 1, Rf_mkCharCE("y", CE_UTF8));
344+
SET_STRING_ELT(names, 2, Rf_mkCharCE("z", CE_UTF8));
345+
x.names() = names;
346+
347+
// Length of names SEXP is actually 4 now, extended by `setAttrib()` to match
348+
// the internal capacity
349+
expect_true(Rf_xlength(Rf_getAttrib(x.data(), R_NamesSymbol)) == 4);
350+
351+
// Set an attribute
352+
SEXP bar = PROTECT(Rf_ScalarInteger(1));
353+
x.attr("foo") = bar;
354+
355+
// Extract out the underlying SEXP using the operator:
356+
// - This truncates to size 3
357+
// - This truncates and keeps names
358+
// - This copies over attributes like `"foo"`
359+
// - This updates the internal SEXP in `x` to the one in `x_sexp` (gross but users
360+
// probably expect this at this point)
361+
SEXP x_sexp = x;
362+
363+
expect_true(Rf_xlength(x_sexp) == 3);
364+
expect_true(Rf_xlength(Rf_getAttrib(x_sexp, R_NamesSymbol)) == 3);
365+
expect_true(Rf_getAttrib(x_sexp, Rf_install("foo")) == bar);
366+
367+
expect_true(x.data() == x_sexp);
368+
369+
UNPROTECT(2);
370+
}
328371
}

inst/include/cpp11/doubles.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ inline typename r_vector<double>::underlying_type* r_vector<double>::get_p(bool
3737
}
3838
}
3939

40+
template <>
41+
inline typename r_vector<double>::underlying_type const* r_vector<double>::get_const_p(
42+
bool is_altrep, SEXP data) {
43+
return REAL_OR_NULL(data);
44+
}
45+
4046
template <>
4147
inline void r_vector<double>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
4248
typename r_vector::underlying_type* buf) {

inst/include/cpp11/integers.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ inline typename r_vector<int>::underlying_type* r_vector<int>::get_p(bool is_alt
3838
}
3939
}
4040

41+
template <>
42+
inline typename r_vector<int>::underlying_type const* r_vector<int>::get_const_p(
43+
bool is_altrep, SEXP data) {
44+
return INTEGER_OR_NULL(data);
45+
}
46+
4147
template <>
4248
inline void r_vector<int>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
4349
typename r_vector::underlying_type* buf) {

inst/include/cpp11/list.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,18 @@ inline typename r_vector<SEXP>::underlying_type* r_vector<SEXP>::get_p(bool, SEX
3030
return nullptr;
3131
}
3232

33+
template <>
34+
inline typename r_vector<SEXP>::underlying_type const* r_vector<SEXP>::get_const_p(
35+
bool is_altrep, SEXP data) {
36+
// No `VECTOR_PTR_OR_NULL()`
37+
if (is_altrep) {
38+
return nullptr;
39+
} else {
40+
// TODO: Use `VECTOR_PTR_RO()` conditionally once R 4.5.0 is officially released
41+
return static_cast<SEXP const*>(DATAPTR_RO(data));
42+
}
43+
}
44+
3345
/// Specialization for lists, where `x["oob"]` returns `R_NilValue`, like at the R level
3446
template <>
3547
inline SEXP r_vector<SEXP>::get_oob() {

inst/include/cpp11/logicals.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ inline typename r_vector<r_bool>::underlying_type* r_vector<r_bool>::get_p(bool
3737
}
3838
}
3939

40+
template <>
41+
inline typename r_vector<r_bool>::underlying_type const* r_vector<r_bool>::get_const_p(
42+
bool is_altrep, SEXP data) {
43+
return LOGICAL_OR_NULL(data);
44+
}
45+
4046
template <>
4147
inline void r_vector<r_bool>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
4248
typename r_vector::underlying_type* buf) {

inst/include/cpp11/r_vector.hpp

Lines changed: 105 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <algorithm> // for max
66
#include <array> // for array
77
#include <cstdio> // for snprintf
8+
#include <cstring> // for memcpy
89
#include <exception> // for exception
910
#include <initializer_list> // for initializer_list
1011
#include <iterator> // for forward_iterator_tag, random_ac...
@@ -145,6 +146,8 @@ class r_vector {
145146
/// Implemented in specialization
146147
static underlying_type* get_p(bool is_altrep, SEXP data);
147148
/// Implemented in specialization
149+
static underlying_type const* get_const_p(bool is_altrep, SEXP data);
150+
/// Implemented in specialization
148151
static void get_region(SEXP x, R_xlen_t i, R_xlen_t n, underlying_type* buf);
149152
/// Implemented in specialization
150153
static SEXPTYPE get_sexptype();
@@ -311,8 +314,13 @@ class r_vector : public cpp11::r_vector<T> {
311314
/// Implemented in specialization
312315
static void set_elt(SEXP x, R_xlen_t i, underlying_type value);
313316

317+
static SEXP reserve_data(SEXP x, bool is_altrep, R_xlen_t size);
318+
static SEXP resize_data(SEXP x, bool is_altrep, R_xlen_t size);
319+
static SEXP resize_names(SEXP x, R_xlen_t size);
320+
314321
using cpp11::r_vector<T>::get_elt;
315322
using cpp11::r_vector<T>::get_p;
323+
using cpp11::r_vector<T>::get_const_p;
316324
using cpp11::r_vector<T>::get_sexptype;
317325
using cpp11::r_vector<T>::valid_type;
318326
using cpp11::r_vector<T>::valid_length;
@@ -759,8 +767,25 @@ inline r_vector<T>::r_vector(SEXP&& data, bool is_altrep)
759767
: cpp11::r_vector<T>(data, is_altrep), capacity_(length_) {}
760768

761769
template <typename T>
762-
inline r_vector<T>::r_vector(const r_vector& rhs)
763-
: cpp11::r_vector<T>(safe[Rf_shallow_duplicate](rhs)), capacity_(rhs.capacity_) {}
770+
inline r_vector<T>::r_vector(const r_vector& rhs) {
771+
// We don't want to just pass through to the read-only constructor because we'd
772+
// have to convert to `SEXP` first, which could truncate, and then we'd still have
773+
// to shallow duplicate after that to ensure we have a duplicate, which can result in
774+
// too many copies (#369).
775+
//
776+
// Instead we take control of setting all fields to try and only duplicate 1 time.
777+
// We try and reclaim unused capacity during the duplication by only reserving up to
778+
// the `rhs.length_`. This is nice because if the user returns this object, the
779+
// truncation has already been done and they don't have to pay for another allocation.
780+
// Importantly, `reserve_data()` always duplicates even if there wasn't extra capacity,
781+
// which ensures we have our own copy.
782+
data_ = reserve_data(rhs.data_, rhs.is_altrep_, rhs.length_);
783+
protect_ = detail::store::insert(data_);
784+
is_altrep_ = ALTREP(data_);
785+
data_p_ = get_p(is_altrep_, data_);
786+
length_ = rhs.length_;
787+
capacity_ = rhs.length_;
788+
}
764789

765790
template <typename T>
766791
inline r_vector<T>::r_vector(r_vector&& rhs) {
@@ -1048,7 +1073,7 @@ inline void r_vector<T>::reserve(R_xlen_t new_capacity) {
10481073
SEXP old_protect = protect_;
10491074

10501075
data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity)
1051-
: safe[Rf_xlengthgets](data_, new_capacity);
1076+
: reserve_data(data_, is_altrep_, new_capacity);
10521077
protect_ = detail::store::insert(data_);
10531078
is_altrep_ = ALTREP(data_);
10541079
data_p_ = get_p(is_altrep_, data_);
@@ -1249,6 +1274,83 @@ inline typename r_vector<T>::iterator r_vector<T>::iterator::operator+(R_xlen_t
12491274
return it;
12501275
}
12511276

1277+
// Compared to `Rf_xlengthgets()`:
1278+
// - This always allocates, even if it is the same size, which is important when we use
1279+
// it in a constructor and need to ensure that it duplicates on the way in.
1280+
// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we
1281+
// use it in constructors and when we truncate right before returning from the `SEXP`
1282+
// operator.
1283+
// - This is more friendly to ALTREP `x`.
1284+
template <typename T>
1285+
inline SEXP r_vector<T>::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) {
1286+
// Resize core data
1287+
SEXP out = PROTECT(resize_data(x, is_altrep, size));
1288+
1289+
// Resize names, if required
1290+
SEXP names = Rf_getAttrib(x, R_NamesSymbol);
1291+
if (names != R_NilValue) {
1292+
names = resize_names(names, size);
1293+
Rf_setAttrib(out, R_NamesSymbol, names);
1294+
}
1295+
1296+
// Copy over "most" attributes, and set OBJECT bit and S4 bit as needed.
1297+
// Does not copy over names, dim, or dim names.
1298+
// Names are handled already. Dim and dim names should not be applicable,
1299+
// as this is a vector.
1300+
// Does not look like it would ever error in our use cases, so no `safe[]`.
1301+
Rf_copyMostAttrib(x, out);
1302+
1303+
UNPROTECT(1);
1304+
return out;
1305+
}
1306+
1307+
template <typename T>
1308+
inline SEXP r_vector<T>::resize_data(SEXP x, bool is_altrep, R_xlen_t size) {
1309+
underlying_type const* v_x = get_const_p(is_altrep, x);
1310+
1311+
SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size));
1312+
underlying_type* v_out = get_p(ALTREP(out), out);
1313+
1314+
const R_xlen_t x_size = Rf_xlength(x);
1315+
const R_xlen_t copy_size = (x_size > size) ? size : x_size;
1316+
1317+
// Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly
1318+
// copy everything from `x`)
1319+
if (v_x != nullptr && v_out != nullptr) {
1320+
std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type));
1321+
} else {
1322+
// Handles ALTREP `x` with no const pointer, VECSXP, STRSXP
1323+
for (R_xlen_t i = 0; i < copy_size; ++i) {
1324+
set_elt(out, i, get_elt(x, i));
1325+
}
1326+
}
1327+
1328+
UNPROTECT(1);
1329+
return out;
1330+
}
1331+
1332+
template <typename T>
1333+
inline SEXP r_vector<T>::resize_names(SEXP x, R_xlen_t size) {
1334+
const SEXP* v_x = STRING_PTR_RO(x);
1335+
1336+
SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size));
1337+
1338+
const R_xlen_t x_size = Rf_xlength(x);
1339+
const R_xlen_t copy_size = (x_size > size) ? size : x_size;
1340+
1341+
for (R_xlen_t i = 0; i < copy_size; ++i) {
1342+
SET_STRING_ELT(out, i, v_x[i]);
1343+
}
1344+
1345+
// Ensure remaining names are initialized to `""`
1346+
for (R_xlen_t i = copy_size; i < size; ++i) {
1347+
SET_STRING_ELT(out, i, R_BlankString);
1348+
}
1349+
1350+
UNPROTECT(1);
1351+
return out;
1352+
}
1353+
12521354
} // namespace writable
12531355

12541356
// TODO: is there a better condition we could use, e.g. assert something true

inst/include/cpp11/raws.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ inline typename r_vector<uint8_t>::underlying_type r_vector<uint8_t>::get_elt(
3535
return RAW_ELT(x, i);
3636
}
3737

38+
template <>
39+
inline typename r_vector<uint8_t>::underlying_type const* r_vector<uint8_t>::get_const_p(
40+
bool is_altrep, SEXP data) {
41+
return RAW_OR_NULL(data);
42+
}
43+
3844
template <>
3945
inline typename r_vector<uint8_t>::underlying_type* r_vector<uint8_t>::get_p(
4046
bool is_altrep, SEXP data) {

inst/include/cpp11/strings.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ inline typename r_vector<r_string>::underlying_type* r_vector<r_string>::get_p(b
3434
return nullptr;
3535
}
3636

37+
template <>
38+
inline typename r_vector<r_string>::underlying_type const*
39+
r_vector<r_string>::get_const_p(bool is_altrep, SEXP data) {
40+
// No `STRING_PTR_OR_NULL()`
41+
if (is_altrep) {
42+
return nullptr;
43+
} else {
44+
return STRING_PTR_RO(data);
45+
}
46+
}
47+
3748
template <>
3849
inline void r_vector<r_string>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
3950
typename r_vector::underlying_type* buf) {

0 commit comments

Comments
 (0)